diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json" --- "a/EXTERNAL_MODEL_RESULTS.json" +++ "b/EXTERNAL_MODEL_RESULTS.json" @@ -1,118 +1,92 @@ { - "LLM2Vec-Meta-Llama-3-supervised": { + "Baichuan-text-embedding": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "AmazonCounterfactualClassification (en)": 79.94, - "AmazonPolarityClassification": 86.07, - "AmazonReviewsClassification (en)": 46.84, - "Banking77Classification": 88.05, - "EmotionClassification": 51.2, - "ImdbClassification": 82.94, - "MTOPDomainClassification (en)": 96.14, - "MTOPIntentClassification (en)": 86.11, - "MassiveIntentClassification (en)": 79.8, - "MassiveScenarioClassification (en)": 81.52, - "ToxicConversationsClassification": 70.59, - "TweetSentimentExtractionClassification": 61.9 + "Model": "Baichuan-text-embedding", + "AmazonReviewsClassification": 48.3, + "IFlyTek": 50.75, + "JDReview": 87.69, + "MassiveIntentClassification": 74.91, + "MassiveScenarioClassification": 81.28, + "MultilingualSentiment": 76.83, + "OnlineShopping": 94.42, + "TNews": 52.62, + "Waimai": 88.77 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "ArxivClusteringP2P": 44.27, - "ArxivClusteringS2S": 46.85, - "BiorxivClusteringP2P": 32.35, - "BiorxivClusteringS2S": 36.7, - "MedrxivClusteringP2P": 30.71, - "MedrxivClusteringS2S": 32.96, - "RedditClustering": 61.72, - "RedditClusteringP2P": 63.98, - "StackExchangeClustering": 72.74, - "StackExchangeClusteringP2P": 32.26, - "TwentyNewsgroupsClustering": 56.41 + "Model": "Baichuan-text-embedding", + "CLSClusteringP2P": 60.37, + "CLSClusteringS2S": 51.09, + "ThuNewsClusteringP2P": 58.23, + "ThuNewsClusteringS2S": 57.83 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "SprintDuplicateQuestions": 95.09, - "TwitterSemEval2015": 81.73, - "TwitterURLCorpus": 86.56 + "Model": "Baichuan-text-embedding", + "Cmnli": 85.31, + "Ocnli": 79.33 }, { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "SprintDuplicateQuestions": 95.09, - "TwitterSemEval2015": 81.73, - "TwitterURLCorpus": 86.56 + "Model": "Baichuan-text-embedding", + "Cmnli": 85.33, + "Ocnli": 79.37 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "AskUbuntuDupQuestions": 65.19, - "MindSmallReranking": 32.67, - "SciDocsRR": 86.05, - "StackOverflowDupQuestions": 54.82 + "Model": "Baichuan-text-embedding", + "CMedQAv1": 88.06, + "CMedQAv2": 88.46, + "MMarcoReranking": 34.3, + "T2Reranking": 67.85 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "ArguAna": 62.78, - "CQADupstackRetrieval": 48.25, - "ClimateFEVER": 34.27, - "DBPedia": 48.34, - "FEVER": 90.2, - "FiQA2018": 55.33, - "HotpotQA": 71.76, - "MSMARCO": 43.24, - "NFCorpus": 41.83, - "NQ": 64.21, - "QuoraRetrieval": 87.16, - "SCIDOCS": 22.96, - "SciFact": 78.22, - "TRECCOVID": 80.34, - "Touche2020": 20.5 + "Model": "Baichuan-text-embedding", + "CmedqaRetrieval": 47.64, + "CovidRetrieval": 86.86, + "DuRetrieval": 88.43, + "EcomRetrieval": 66.39, + "MMarcoRetrieval": 80.17, + "MedicalRetrieval": 61.1, + "T2Retrieval": 80.11, + "VideoRetrieval": 74.28 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "BIOSSES": 84.92, - "SICK-R": 83.94, - "STS12": 79.27, - "STS13": 84.83, - "STS14": 82.94, - "STS15": 88.09, - "STS16": 86.54, - "STS17 (en-en)": 89.58, - "STS22 (en)": 67.67, - "STSBenchmark": 88.05 + "Model": "Baichuan-text-embedding", + "AFQMC": 50.8, + "ATEC": 53.23, + "BQ": 66.49, + "LCQMC": 76.6, + "PAWSX": 47.56, + "QBQTC": 39.96, + "STS22": 65.78, + "STSB": 80.14 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "LLM2Vec-Meta-Llama-3-supervised", - "SummEval": 30.94 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -121,457 +95,229 @@ "p-MRR": [] } }, - "msmarco-bert-co-condensor": { + "Cohere-embed-english-v3.0": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "msmarco-bert-co-condensor", - "AmazonCounterfactualClassification (en)": 64.06, - "AmazonPolarityClassification": 66.88, - "AmazonReviewsClassification (en)": 34.85, - "Banking77Classification": 82.35, - "EmotionClassification": 41.91, - "ImdbClassification": 60.17, - "MTOPDomainClassification (en)": 91.34, - "MTOPIntentClassification (en)": 71.07, - "MassiveIntentClassification (en)": 70.4, - "MassiveScenarioClassification (en)": 73.73, - "ToxicConversationsClassification": 64.01, - "TweetSentimentExtractionClassification": 55.74 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "msmarco-bert-co-condensor", - "ArxivClusteringP2P": 36.94, - "ArxivClusteringS2S": 29.03, - "BiorxivClusteringP2P": 32.35, - "BiorxivClusteringS2S": 28.16, - "MedrxivClusteringP2P": 30.23, - "MedrxivClusteringS2S": 27.01, - "RedditClustering": 48.04, - "RedditClusteringP2P": 53.53, - "StackExchangeClustering": 59.54, - "StackExchangeClusteringP2P": 30.48, - "TwentyNewsgroupsClustering": 38.68 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "msmarco-bert-co-condensor", - "SprintDuplicateQuestions": 96.09, - "TwitterSemEval2015": 65.95, - "TwitterURLCorpus": 83.17 - }, - { - "Model": "msmarco-bert-co-condensor", - "SprintDuplicateQuestions": 96.09, - "TwitterSemEval2015": 65.95, - "TwitterURLCorpus": 83.17 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "msmarco-bert-co-condensor", - "AskUbuntuDupQuestions": 58.99, - "MindSmallReranking": 27.13, - "SciDocsRR": 72.78, - "StackOverflowDupQuestions": 48.48 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "msmarco-bert-co-condensor", - "ArguAna": 45.15, - "CQADupstackRetrieval": 27.72, - "ClimateFEVER": 16.96, - "DBPedia": 27.86, - "FEVER": 45.68, - "FiQA2018": 15.62, - "HotpotQA": 35.61, - "MSMARCO": 29.57, - "NFCorpus": 22.29, - "NQ": 29.85, - "QuoraRetrieval": 86.51, - "SCIDOCS": 10.13, - "SciFact": 52.31, - "TRECCOVID": 40.54, - "Touche2020": 8.57 + "Model": "Cohere-embed-english-v3.0", + "AILACasedocs": 31.54, + "AILAStatutes": 27.15, + "ARCChallenge": 9.89, + "AlphaNLI": 15.1, + "BrightRetrieval (psychology)": 21.82, + "BrightRetrieval (economics)": 20.18, + "BrightRetrieval (robotics)": 16.21, + "BrightRetrieval (biology)": 18.98, + "BrightRetrieval (stackoverflow)": 16.47, + "BrightRetrieval (theoremqa_theorems)": 7.14, + "BrightRetrieval (pony)": 1.77, + "BrightRetrieval (sustainable_living)": 17.69, + "BrightRetrieval (aops)": 6.46, + "BrightRetrieval (theoremqa_questions)": 15.07, + "BrightRetrieval (leetcode)": 26.78, + "BrightRetrieval (earth_science)": 27.45, + "GerDaLIRSmall": 6.05, + "HellaSwag": 26.35, + "LeCaRDv2": 21.02, + "LegalBenchConsumerContractsQA": 77.12, + "LegalBenchCorporateLobbying": 93.68, + "LegalQuAD": 26.08, + "LegalSummarization": 61.7, + "PIQA": 28.49, + "Quail": 4.1, + "RARbCode": 57.19, + "RARbMath": 72.26, + "SIQA": 4.26, + "SpartQA": 3.75, + "TempReasonL1": 1.5, + "TempReasonL2Fact": 35.91, + "TempReasonL2Pure": 1.89, + "TempReasonL3Fact": 27.51, + "TempReasonL3Pure": 8.53, + "WinoGrande": 58.01 } - ] - }, - "STS": { - "cosine_spearman": [ + ], + "recall_at_1": [ { - "Model": "msmarco-bert-co-condensor", - "BIOSSES": 77.32, - "SICK-R": 72.0, - "STS12": 68.19, - "STS13": 80.4, - "STS14": 74.02, - "STS15": 82.57, - "STS16": 79.78, - "STS17 (en-en)": 85.94, - "STS22 (en)": 67.54, - "STSBenchmark": 76.97 + "Model": "Cohere-embed-english-v3.0", + "BrightRetrieval (robotics)": 9.9, + "BrightRetrieval (psychology)": 20.5, + "BrightRetrieval (biology)": 31.47, + "BrightRetrieval (economics)": 17.96, + "BrightRetrieval (stackoverflow)": 15.81, + "BrightRetrieval (pony)": 0.84, + "BrightRetrieval (sustainable_living)": 15.23, + "BrightRetrieval (earth_science)": 35.49 } ] }, + "STS": { + "cosine_spearman": [] + }, "Summarization": { - "cosine_spearman": [ - { - "Model": "msmarco-bert-co-condensor", - "SummEval": 29.5 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] - } - }, - "LaBSE-en-ru": { - "BitextMining": { - "f1": [ + "p-MRR": [ { - "Model": "LaBSE-en-ru", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.62 + "Model": "Cohere-embed-english-v3.0", + "Core17InstructionRetrieval": 2.8, + "News21InstructionRetrieval": 0.2, + "Robust04InstructionRetrieval": -3.63 } ] + } + }, + "Cohere-embed-english-v3.0-instruct": { + "BitextMining": { + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "LaBSE-en-ru", - "GeoreviewClassification (rus-Cyrl)": 40.89, - "HeadlineClassification (rus-Cyrl)": 68.75, - "InappropriatenessClassification (rus-Cyrl)": 58.48, - "KinopoiskClassification (rus-Cyrl)": 49.85, - "MassiveIntentClassification (swa-Latn)": 19.98, - "MassiveIntentClassification (aze-Latn)": 19.52, - "MassiveIntentClassification (tur-Latn)": 24.12, - "MassiveIntentClassification (cmo-Hans)": 3.96, - "MassiveIntentClassification (amh-Ethi)": 2.76, - "MassiveIntentClassification (kan-Knda)": 2.86, - "MassiveIntentClassification (hin-Deva)": 3.29, - "MassiveIntentClassification (tgl-Latn)": 27.08, - "MassiveIntentClassification (tha-Thai)": 4.0, - "MassiveIntentClassification (swe-Latn)": 32.01, - "MassiveIntentClassification (deu-Latn)": 35.14, - "MassiveIntentClassification (spa-Latn)": 37.67, - "MassiveIntentClassification (por-Latn)": 39.84, - "MassiveIntentClassification (jpn-Jpan)": 4.78, - "MassiveIntentClassification (fin-Latn)": 31.11, - "MassiveIntentClassification (kat-Geor)": 2.87, - "MassiveIntentClassification (slv-Latn)": 35.66, - "MassiveIntentClassification (rus-Cyrl)": 60.53, - "MassiveIntentClassification (ita-Latn)": 43.32, - "MassiveIntentClassification (tel-Telu)": 2.72, - "MassiveIntentClassification (afr-Latn)": 30.59, - "MassiveIntentClassification (isl-Latn)": 25.61, - "MassiveIntentClassification (fas-Arab)": 3.71, - "MassiveIntentClassification (vie-Latn)": 23.0, - "MassiveIntentClassification (ben-Beng)": 3.35, - "MassiveIntentClassification (hye-Armn)": 2.8, - "MassiveIntentClassification (pol-Latn)": 31.3, - "MassiveIntentClassification (cym-Latn)": 26.59, - "MassiveIntentClassification (jav-Latn)": 26.84, - "MassiveIntentClassification (mon-Cyrl)": 35.97, - "MassiveIntentClassification (en)": 60.48, - "MassiveIntentClassification (msa-Latn)": 27.82, - "MassiveIntentClassification (nob-Latn)": 35.78, - "MassiveIntentClassification (heb-Hebr)": 2.33, - "MassiveIntentClassification (khm-Khmr)": 4.6, - "MassiveIntentClassification (nld-Latn)": 34.66, - "MassiveIntentClassification (ind-Latn)": 33.31, - "MassiveIntentClassification (mal-Mlym)": 2.63, - "MassiveIntentClassification (tam-Taml)": 2.22, - "MassiveIntentClassification (mya-Mymr)": 3.57, - "MassiveIntentClassification (urd-Arab)": 3.36, - "MassiveIntentClassification (dan-Latn)": 38.66, - "MassiveIntentClassification (cmo-Hant)": 5.29, - "MassiveIntentClassification (ron-Latn)": 37.45, - "MassiveIntentClassification (lav-Latn)": 23.92, - "MassiveIntentClassification (fra-Latn)": 40.29, - "MassiveIntentClassification (ell-Grek)": 11.14, - "MassiveIntentClassification (sqi-Latn)": 35.84, - "MassiveIntentClassification (hun-Latn)": 26.74, - "MassiveIntentClassification (kor-Kore)": 2.69, - "MassiveIntentClassification (ara-Arab)": 5.19, - "MassiveScenarioClassification (swa-Latn)": 25.61, - "MassiveScenarioClassification (aze-Latn)": 24.48, - "MassiveScenarioClassification (tur-Latn)": 31.38, - "MassiveScenarioClassification (cmo-Hans)": 9.98, - "MassiveScenarioClassification (amh-Ethi)": 7.59, - "MassiveScenarioClassification (kan-Knda)": 8.73, - "MassiveScenarioClassification (hin-Deva)": 8.77, - "MassiveScenarioClassification (tgl-Latn)": 35.12, - "MassiveScenarioClassification (tha-Thai)": 8.69, - "MassiveScenarioClassification (swe-Latn)": 35.83, - "MassiveScenarioClassification (deu-Latn)": 41.72, - "MassiveScenarioClassification (spa-Latn)": 43.33, - "MassiveScenarioClassification (por-Latn)": 44.62, - "MassiveScenarioClassification (jpn-Jpan)": 9.51, - "MassiveScenarioClassification (fin-Latn)": 33.79, - "MassiveScenarioClassification (kat-Geor)": 7.32, - "MassiveScenarioClassification (slv-Latn)": 37.6, - "MassiveScenarioClassification (rus-Cyrl)": 65.15, - "MassiveScenarioClassification (ita-Latn)": 47.28, - "MassiveScenarioClassification (tel-Telu)": 7.53, - "MassiveScenarioClassification (afr-Latn)": 37.27, - "MassiveScenarioClassification (isl-Latn)": 30.32, - "MassiveScenarioClassification (fas-Arab)": 6.83, - "MassiveScenarioClassification (vie-Latn)": 28.92, - "MassiveScenarioClassification (ben-Beng)": 8.57, - "MassiveScenarioClassification (hye-Armn)": 8.91, - "MassiveScenarioClassification (pol-Latn)": 33.75, - "MassiveScenarioClassification (cym-Latn)": 30.38, - "MassiveScenarioClassification (jav-Latn)": 33.94, - "MassiveScenarioClassification (mon-Cyrl)": 41.53, - "MassiveScenarioClassification (en)": 65.43, - "MassiveScenarioClassification (msa-Latn)": 36.28, - "MassiveScenarioClassification (nob-Latn)": 42.43, - "MassiveScenarioClassification (heb-Hebr)": 8.64, - "MassiveScenarioClassification (khm-Khmr)": 9.99, - "MassiveScenarioClassification (nld-Latn)": 41.47, - "MassiveScenarioClassification (ind-Latn)": 39.05, - "MassiveScenarioClassification (mal-Mlym)": 7.24, - "MassiveScenarioClassification (tam-Taml)": 7.71, - "MassiveScenarioClassification (mya-Mymr)": 9.94, - "MassiveScenarioClassification (urd-Arab)": 9.16, - "MassiveScenarioClassification (dan-Latn)": 44.69, - "MassiveScenarioClassification (cmo-Hant)": 10.48, - "MassiveScenarioClassification (ron-Latn)": 44.55, - "MassiveScenarioClassification (lav-Latn)": 26.26, - "MassiveScenarioClassification (fra-Latn)": 45.08, - "MassiveScenarioClassification (ell-Grek)": 19.46, - "MassiveScenarioClassification (sqi-Latn)": 40.9, - "MassiveScenarioClassification (hun-Latn)": 33.92, - "MassiveScenarioClassification (kor-Kore)": 7.37, - "MassiveScenarioClassification (ara-Arab)": 12.43, - "RuReviewsClassification (rus-Cyrl)": 58.01, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.8, - "RuSciBenchOECDClassification (rus-Cyrl)": 40.36 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "LaBSE-en-ru", - "GeoreviewClusteringP2P (rus-Cyrl)": 51.89, - "MLSUMClusteringP2P (rus-Cyrl)": 37.87, - "MLSUMClusteringS2S (rus-Cyrl)": 41.24, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.48, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.16 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "LaBSE-en-ru", - "OpusparcusPC (rus-Cyrl)": 87.18, - "TERRa (rus-Cyrl)": 55.61 - }, - { - "Model": "LaBSE-en-ru", - "OpusparcusPC (rus-Cyrl)": 87.18, - "TERRa (rus-Cyrl)": 55.61 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "LaBSE-en-ru", - "MIRACLReranking (rus-Cyrl)": 28.86 - }, - { - "Model": "LaBSE-en-ru", - "RuBQReranking (rus-Cyrl)": 54.83 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LaBSE-en-ru", - "MIRACLRetrieval (rus-Cyrl)": 10.58, - "RiaNewsRetrieval (rus-Cyrl)": 34.73, - "RuBQRetrieval (rus-Cyrl)": 29.03 + "Model": "Cohere-embed-english-v3.0-instruct", + "ARCChallenge": 10.1, + "AlphaNLI": 18.75, + "HellaSwag": 29.02, + "PIQA": 27.89, + "Quail": 7.77, + "RARbCode": 56.56, + "RARbMath": 72.05, + "SIQA": 5.03, + "SpartQA": 3.33, + "TempReasonL1": 1.43, + "TempReasonL2Fact": 40.46, + "TempReasonL2Pure": 2.39, + "TempReasonL3Fact": 33.87, + "TempReasonL3Pure": 7.52, + "WinoGrande": 65.02 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "LaBSE-en-ru", - "RUParaPhraserSTS (rus-Cyrl)": 65.87, - "RuSTSBenchmarkSTS (rus-Cyrl)": 73.32, - "STS22 (deu-Latn)": 38.9, - "STS22 (en)": 59.47, - "STS22 (pol-Latn_eng-Latn)": 58.73, - "STS22 (spa-Latn)": 60.85, - "STS22 (fra-Latn)": 74.98, - "STS22 (deu-Latn_eng-Latn)": 47.98, - "STS22 (deu-Latn_fra-Latn)": 59.4, - "STS22 (deu-Latn_pol-Latn)": 39.48, - "STS22 (pol-Latn)": 32.74, - "STS22 (tur-Latn)": 55.04, - "STS22 (spa-Latn_eng-Latn)": 70.8, - "STS22 (rus-Cyrl)": 58.53, - "STS22 (ita-Latn)": 68.58, - "STS22 (fra-Latn_pol-Latn)": 61.98, - "STS22 (spa-Latn_ita-Latn)": 66.83, - "STS22 (cmn-Hans_eng-Latn)": 24.98, - "STS22 (ara-Arab)": 31.85, - "STS22 (cmn-Hans)": 35.1, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.02 - } - ] + "cosine_spearman": [] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "LaBSE-en-ru", - "CEDRClassification (rus-Cyrl)": 40.75, - "SensitiveTopicsClassification (rus-Cyrl)": 21.79 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "LLM2Vec-Llama-2-supervised": { + "Cohere-embed-multilingual-light-v3.0": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "AmazonCounterfactualClassification (en)": 82.22, - "AmazonPolarityClassification": 89.69, - "AmazonReviewsClassification (en)": 48.47, - "Banking77Classification": 88.17, - "EmotionClassification": 51.71, - "ImdbClassification": 85.78, - "MTOPDomainClassification (en)": 95.57, - "MTOPIntentClassification (en)": 82.81, - "MassiveIntentClassification (en)": 78.06, - "MassiveScenarioClassification (en)": 81.35, - "ToxicConversationsClassification": 71.01, - "TweetSentimentExtractionClassification": 61.11 + "Model": "Cohere-embed-multilingual-light-v3.0", + "AmazonReviewsClassification": 38.6, + "MTOPDomainClassification": 80.79, + "MTOPIntentClassification": 50.01, + "MasakhaNEWSClassification": 82.58, + "MassiveIntentClassification": 56.31, + "MassiveScenarioClassification": 59.5 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "ArxivClusteringP2P": 43.14, - "ArxivClusteringS2S": 42.38, - "BiorxivClusteringP2P": 35.88, - "BiorxivClusteringS2S": 34.81, - "MedrxivClusteringP2P": 32.23, - "MedrxivClusteringS2S": 31.37, - "RedditClustering": 61.1, - "RedditClusteringP2P": 64.52, - "StackExchangeClustering": 67.98, - "StackExchangeClusteringP2P": 33.2, - "TwentyNewsgroupsClustering": 51.04 + "Model": "Cohere-embed-multilingual-light-v3.0", + "AlloProfClusteringP2P": 61.96, + "AlloProfClusteringS2S": 31.36, + "HALClusteringS2S": 17.31, + "MLSUMClusteringP2P": 42.8, + "MLSUMClusteringS2S": 32.72, + "MasakhaNEWSClusteringP2P": 56.81, + "MasakhaNEWSClusteringS2S": 29.41 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "SprintDuplicateQuestions": 96.83, - "TwitterSemEval2015": 80.7, - "TwitterURLCorpus": 86.56 + "Model": "Cohere-embed-multilingual-light-v3.0", + "OpusparcusPC": 90.92, + "PawsXPairClassification": 57.32 }, { - "Model": "LLM2Vec-Llama-2-supervised", - "SprintDuplicateQuestions": 96.83, - "TwitterSemEval2015": 80.7, - "TwitterURLCorpus": 86.56 + "Model": "Cohere-embed-multilingual-light-v3.0", + "OpusparcusPC": 90.92, + "PawsXPairClassification": 57.35 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "AskUbuntuDupQuestions": 63.13, - "MindSmallReranking": 31.34, - "SciDocsRR": 84.03, - "StackOverflowDupQuestions": 51.02 + "Model": "Cohere-embed-multilingual-light-v3.0", + "AlloprofReranking": 51.6, + "SyntecReranking": 88.03 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "ArguAna": 56.53, - "CQADupstackRetrieval": 45.94, - "ClimateFEVER": 30.7, - "DBPedia": 48.42, - "FEVER": 89.93, - "FiQA2018": 51.28, - "HotpotQA": 72.99, - "MSMARCO": 41.46, - "NFCorpus": 40.33, - "NQ": 61.24, - "QuoraRetrieval": 85.59, - "SCIDOCS": 21.05, - "SciFact": 77.3, - "TRECCOVID": 79.25, - "Touche2020": 16.92 + "Model": "Cohere-embed-multilingual-light-v3.0", + "AlloprofRetrieval": 35.39, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 23.0, + "SyntecRetrieval": 76.88, + "XPQARetrieval": 45.23 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "BIOSSES": 82.13, - "SICK-R": 83.01, - "STS12": 78.85, - "STS13": 86.84, - "STS14": 84.04, - "STS15": 88.72, - "STS16": 86.79, - "STS17 (en-en)": 90.63, - "STS22 (en)": 67.55, - "STSBenchmark": 88.72 + "Model": "Cohere-embed-multilingual-light-v3.0", + "SICKFr": 75.5, + "STS22": 82.8, + "STSBenchmarkMultilingualSTS": 76.48 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LLM2Vec-Llama-2-supervised", - "SummEval": 28.49 + "Model": "Cohere-embed-multilingual-light-v3.0", + "SummEvalFr": 31.4 } ] }, @@ -582,118 +328,87 @@ "p-MRR": [] } }, - "LLM2Vec-Sheared-Llama-supervised": { + "Cohere-embed-multilingual-v3.0": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "AmazonCounterfactualClassification (en)": 77.42, - "AmazonPolarityClassification": 82.05, - "AmazonReviewsClassification (en)": 40.81, - "Banking77Classification": 86.01, - "EmotionClassification": 48.38, - "ImdbClassification": 75.33, - "MTOPDomainClassification (en)": 94.09, - "MTOPIntentClassification (en)": 77.05, - "MassiveIntentClassification (en)": 75.58, - "MassiveScenarioClassification (en)": 79.16, - "ToxicConversationsClassification": 69.92, - "TweetSentimentExtractionClassification": 60.76 + "Model": "Cohere-embed-multilingual-v3.0", + "AmazonReviewsClassification": 41.89, + "MTOPDomainClassification": 86.23, + "MTOPIntentClassification": 61.07, + "MasakhaNEWSClassification": 83.06, + "MassiveIntentClassification": 62.94, + "MassiveScenarioClassification": 67.29 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "ArxivClusteringP2P": 43.47, - "ArxivClusteringS2S": 39.85, - "BiorxivClusteringP2P": 37.1, - "BiorxivClusteringS2S": 34.28, - "MedrxivClusteringP2P": 33.55, - "MedrxivClusteringS2S": 31.11, - "RedditClustering": 53.02, - "RedditClusteringP2P": 60.47, - "StackExchangeClustering": 63.04, - "StackExchangeClusteringP2P": 34.01, - "TwentyNewsgroupsClustering": 49.37 + "Model": "Cohere-embed-multilingual-v3.0", + "AlloProfClusteringP2P": 63.53, + "AlloProfClusteringS2S": 36.18, + "HALClusteringS2S": 19.9, + "MLSUMClusteringP2P": 45.08, + "MLSUMClusteringS2S": 34.75, + "MasakhaNEWSClusteringP2P": 53.18, + "MasakhaNEWSClusteringS2S": 32.31 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "SprintDuplicateQuestions": 96.25, - "TwitterSemEval2015": 76.14, - "TwitterURLCorpus": 86.23 + "Model": "Cohere-embed-multilingual-v3.0", + "OpusparcusPC": 94.08, + "PawsXPairClassification": 61.26 }, { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "SprintDuplicateQuestions": 96.25, - "TwitterSemEval2015": 76.14, - "TwitterURLCorpus": 86.23 + "Model": "Cohere-embed-multilingual-v3.0", + "OpusparcusPC": 94.08, + "PawsXPairClassification": 61.26 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "AskUbuntuDupQuestions": 60.71, - "MindSmallReranking": 31.96, - "SciDocsRR": 79.23, - "StackOverflowDupQuestions": 49.61 + "Model": "Cohere-embed-multilingual-v3.0", + "AlloprofReranking": 51.01, + "SyntecReranking": 85.72 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "ArguAna": 51.66, - "CQADupstackRetrieval": 41.73, - "ClimateFEVER": 33.49, - "DBPedia": 43.58, - "FEVER": 86.81, - "FiQA2018": 41.0, - "HotpotQA": 63.85, - "MSMARCO": 38.32, - "NFCorpus": 37.12, - "NQ": 53.89, - "QuoraRetrieval": 87.37, - "SCIDOCS": 17.96, - "SciFact": 72.08, - "TRECCOVID": 80.41, - "Touche2020": 22.31 + "Model": "Cohere-embed-multilingual-v3.0", + "AlloprofRetrieval": 38.36, + "BSARDRetrieval": 0.14, + "MintakaRetrieval": 25.44, + "SyntecRetrieval": 79.27, + "XPQARetrieval": 58.87 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "BIOSSES": 85.88, - "SICK-R": 82.25, - "STS12": 78.28, - "STS13": 85.52, - "STS14": 82.49, - "STS15": 88.76, - "STS16": 87.11, - "STS17 (en-en)": 90.1, - "STS22 (en)": 68.25, - "STSBenchmark": 87.16 + "Model": "Cohere-embed-multilingual-v3.0", + "SICKFr": 79.23, + "STS22": 82.76, + "STSBenchmarkMultilingualSTS": 81.84 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LLM2Vec-Sheared-Llama-supervised", - "SummEval": 30.01 + "Model": "Cohere-embed-multilingual-v3.0", + "SummEvalFr": 31.26 } ] }, @@ -704,89 +419,50 @@ "p-MRR": [] } }, - "Cohere-embed-multilingual-light-v3.0": { + "DanskBERT": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "DanskBERT", + "BornholmBitextMining": 6.34 + } + ] }, "Classification": { "accuracy": [ { - "Model": "Cohere-embed-multilingual-light-v3.0", - "AmazonReviewsClassification (fr)": 38.6, - "MTOPDomainClassification (fr)": 80.79, - "MTOPIntentClassification (fr)": 50.01, - "MasakhaNEWSClassification (fra)": 82.58, - "MassiveIntentClassification (fr)": 56.31, - "MassiveScenarioClassification (fr)": 59.5 + "Model": "DanskBERT", + "AngryTweetsClassification": 54.28, + "DKHateClassification": 59.3, + "DanishPoliticalCommentsClassification": 39.81, + "LccSentimentClassification": 58.0, + "MassiveIntentClassification": 40.82, + "MassiveScenarioClassification": 40.14, + "NoRecClassification": 46.06, + "NordicLangClassification": 74.25, + "NorwegianParliament": 56.79, + "ScalaDaClassification": 66.59, + "ScalaNbClassification": 59.99 } ] }, "Clustering": { - "v_measure": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloProfClusteringP2P": 61.96, - "AlloProfClusteringS2S": 31.36, - "HALClusteringS2S": 17.31, - "MLSUMClusteringP2P": 42.8, - "MLSUMClusteringS2S": 32.72, - "MasakhaNEWSClusteringP2P (fra)": 56.81, - "MasakhaNEWSClusteringS2S (fra)": 29.41 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "OpusparcusPC (fr)": 90.92, - "PawsXPairClassification (fr)": 57.32 - }, - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "OpusparcusPC (fr)": 90.92, - "PawsXPairClassification (fr)": 57.35 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloprofReranking": 51.6, - "SyntecReranking": 88.03 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloprofRetrieval": 35.39, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 23.0, - "SyntecRetrieval": 76.88, - "XPQARetrieval (fr)": 45.23 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "SICKFr": 75.5, - "STS22 (fr)": 82.8, - "STSBenchmarkMultilingualSTS (fr)": 76.48 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "Cohere-embed-multilingual-light-v3.0", - "SummEvalFr": 31.4 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -795,289 +471,721 @@ "p-MRR": [] } }, - "bert-base-multilingual-uncased": { + "FollowIR-7B": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "bert-base-multilingual-uncased", - "AmazonReviewsClassification (fr)": 29.02, - "MTOPDomainClassification (fr)": 64.49, - "MTOPIntentClassification (fr)": 39.4, - "MasakhaNEWSClassification (fra)": 75.69, - "MassiveIntentClassification (fr)": 38.01, - "MassiveScenarioClassification (fr)": 43.63 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "bert-base-multilingual-uncased", - "AlloProfClusteringP2P": 60.66, - "AlloProfClusteringS2S": 35.05, - "HALClusteringS2S": 20.9, - "MLSUMClusteringP2P": 43.5, - "MLSUMClusteringS2S": 30.99, - "MasakhaNEWSClusteringP2P (fra)": 49.71, - "MasakhaNEWSClusteringS2S (fra)": 42.23 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "bert-base-multilingual-uncased", - "OpusparcusPC (fr)": 87.43, - "PawsXPairClassification (fr)": 53.22 - }, - { - "Model": "bert-base-multilingual-uncased", - "OpusparcusPC (fr)": 87.53, - "PawsXPairClassification (fr)": 53.33 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "bert-base-multilingual-uncased", - "AlloprofReranking": 38.85, - "SyntecReranking": 66.4 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "bert-base-multilingual-uncased", - "AlloprofRetrieval": 5.51, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 2.87, - "SyntecRetrieval": 34.95, - "XPQARetrieval (fr)": 26.12 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "bert-base-multilingual-uncased", - "SICKFr": 58.26, - "STS22 (fr)": 56.47, - "STSBenchmarkMultilingualSTS (fr)": 54.97 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "bert-base-multilingual-uncased", - "SummEvalFr": 30.72 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "FollowIR-7B", + "Core17InstructionRetrieval": 16.48, + "News21InstructionRetrieval": 6.26, + "Robust04InstructionRetrieval": 13.72 + } + ] } }, - "bge-m3": { + "GritLM-7B": { "BitextMining": { "f1": [ { - "Model": "bge-m3", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.42 + "Model": "GritLM-7B", + "BornholmBitextMining": 61.17, + "Tatoeba (hye-eng)": 54.7, + "Tatoeba (afr-eng)": 91.82, + "Tatoeba (vie-eng)": 96.18, + "Tatoeba (cym-eng)": 75.08, + "Tatoeba (xho-eng)": 42.31, + "Tatoeba (uzb-eng)": 59.94, + "Tatoeba (gsw-eng)": 59.66, + "Tatoeba (mal-eng)": 52.3, + "Tatoeba (ben-eng)": 78.41, + "Tatoeba (spa-eng)": 98.62, + "Tatoeba (arq-eng)": 43.42, + "Tatoeba (hin-eng)": 93.86, + "Tatoeba (yue-eng)": 88.43, + "Tatoeba (nld-eng)": 96.35, + "Tatoeba (hun-eng)": 94.2, + "Tatoeba (tha-eng)": 92.55, + "Tatoeba (ber-eng)": 7.88, + "Tatoeba (cor-eng)": 10.97, + "Tatoeba (kab-eng)": 3.21, + "Tatoeba (mon-eng)": 38.62, + "Tatoeba (pes-eng)": 90.25, + "Tatoeba (ind-eng)": 93.53, + "Tatoeba (fry-eng)": 73.7, + "Tatoeba (dan-eng)": 95.6, + "Tatoeba (ron-eng)": 94.53, + "Tatoeba (dsb-eng)": 69.41, + "Tatoeba (nds-eng)": 82.66, + "Tatoeba (jav-eng)": 40.36, + "Tatoeba (ast-eng)": 86.35, + "Tatoeba (lfn-eng)": 79.93, + "Tatoeba (mkd-eng)": 88.68, + "Tatoeba (kzj-eng)": 12.2, + "Tatoeba (ido-eng)": 82.17, + "Tatoeba (eus-eng)": 44.09, + "Tatoeba (ita-eng)": 94.54, + "Tatoeba (orv-eng)": 63.76, + "Tatoeba (zsm-eng)": 93.39, + "Tatoeba (mar-eng)": 68.94, + "Tatoeba (slv-eng)": 91.48, + "Tatoeba (hsb-eng)": 79.62, + "Tatoeba (ile-eng)": 91.1, + "Tatoeba (ara-eng)": 89.5, + "Tatoeba (yid-eng)": 31.64, + "Tatoeba (swh-eng)": 64.97, + "Tatoeba (khm-eng)": 32.74, + "Tatoeba (arz-eng)": 68.47, + "Tatoeba (amh-eng)": 11.66, + "Tatoeba (max-eng)": 66.33, + "Tatoeba (pms-eng)": 70.24, + "Tatoeba (kat-eng)": 58.44, + "Tatoeba (ang-eng)": 83.83, + "Tatoeba (fra-eng)": 94.5, + "Tatoeba (ukr-eng)": 94.35, + "Tatoeba (wuu-eng)": 90.31, + "Tatoeba (pol-eng)": 97.35, + "Tatoeba (tzl-eng)": 62.95, + "Tatoeba (awa-eng)": 70.28, + "Tatoeba (isl-eng)": 88.47, + "Tatoeba (hrv-eng)": 95.43, + "Tatoeba (bre-eng)": 19.19, + "Tatoeba (cmn-eng)": 95.48, + "Tatoeba (gla-eng)": 65.28, + "Tatoeba (ces-eng)": 96.03, + "Tatoeba (est-eng)": 69.22, + "Tatoeba (aze-eng)": 82.05, + "Tatoeba (nov-eng)": 78.53, + "Tatoeba (ina-eng)": 96.35, + "Tatoeba (cha-eng)": 47.58, + "Tatoeba (kaz-eng)": 47.61, + "Tatoeba (fin-eng)": 93.35, + "Tatoeba (deu-eng)": 99.47, + "Tatoeba (kor-eng)": 91.4, + "Tatoeba (tam-eng)": 66.66, + "Tatoeba (mhr-eng)": 13.31, + "Tatoeba (tur-eng)": 95.13, + "Tatoeba (sqi-eng)": 73.59, + "Tatoeba (por-eng)": 94.45, + "Tatoeba (epo-eng)": 90.0, + "Tatoeba (jpn-eng)": 95.58, + "Tatoeba (tgl-eng)": 94.29, + "Tatoeba (swg-eng)": 73.87, + "Tatoeba (ceb-eng)": 45.91, + "Tatoeba (cat-eng)": 94.15, + "Tatoeba (nob-eng)": 98.0, + "Tatoeba (gle-eng)": 71.65, + "Tatoeba (nno-eng)": 90.49, + "Tatoeba (war-eng)": 41.74, + "Tatoeba (slk-eng)": 93.92, + "Tatoeba (urd-eng)": 83.82, + "Tatoeba (bos-eng)": 95.9, + "Tatoeba (bel-eng)": 90.07, + "Tatoeba (heb-eng)": 82.14, + "Tatoeba (glg-eng)": 94.18, + "Tatoeba (srp-eng)": 94.23, + "Tatoeba (oci-eng)": 71.66, + "Tatoeba (ell-eng)": 91.56, + "Tatoeba (kur-eng)": 35.06, + "Tatoeba (fao-eng)": 83.02, + "Tatoeba (tat-eng)": 39.47, + "Tatoeba (lvs-eng)": 72.89, + "Tatoeba (csb-eng)": 69.78, + "Tatoeba (dtp-eng)": 10.77, + "Tatoeba (pam-eng)": 15.22, + "Tatoeba (tel-eng)": 37.93, + "Tatoeba (bul-eng)": 93.5, + "Tatoeba (tuk-eng)": 48.38, + "Tatoeba (cbk-eng)": 83.83, + "Tatoeba (lit-eng)": 77.91, + "Tatoeba (swe-eng)": 94.95, + "Tatoeba (uig-eng)": 40.4, + "Tatoeba (lat-eng)": 91.56, + "Tatoeba (rus-eng)": 93.93 } ] }, "Classification": { "accuracy": [ { - "Model": "bge-m3", - "GeoreviewClassification (rus-Cyrl)": 48.27, - "HeadlineClassification (rus-Cyrl)": 70.32, - "InappropriatenessClassification (rus-Cyrl)": 59.87, - "KinopoiskClassification (rus-Cyrl)": 58.23, - "MassiveIntentClassification (rus-Cyrl)": 68.75, - "MassiveScenarioClassification (rus-Cyrl)": 73.42, - "RuReviewsClassification (rus-Cyrl)": 66.91, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 55.81, - "RuSciBenchOECDClassification (rus-Cyrl)": 42.57 + "Model": "GritLM-7B", + "AllegroReviews": 56.77, + "AmazonCounterfactualClassification (en-ext)": 79.61, + "AmazonCounterfactualClassification (en)": 80.54, + "AmazonCounterfactualClassification (de)": 74.94, + "AmazonCounterfactualClassification (ja)": 81.6, + "AmazonPolarityClassification": 96.64, + "AmazonReviewsClassification (en)": 59.91, + "AmazonReviewsClassification (de)": 58.21, + "AmazonReviewsClassification (es)": 55.74, + "AmazonReviewsClassification (fr)": 54.43, + "AmazonReviewsClassification (ja)": 54.51, + "AmazonReviewsClassification (zh)": 51.01, + "AngryTweetsClassification": 65.35, + "Banking77Classification": 84.44, + "CBD": 77.23, + "DanishPoliticalCommentsClassification": 41.28, + "EmotionClassification": 56.01, + "GeoreviewClassification": 53.47, + "HeadlineClassification": 85.66, + "IFlyTek": 52.7, + "ImdbClassification": 95.07, + "InappropriatenessClassification": 65.29, + "JDReview": 87.65, + "KinopoiskClassification": 64.25, + "LccSentimentClassification": 70.13, + "MTOPDomainClassification (en)": 95.37, + "MTOPDomainClassification (de)": 93.51, + "MTOPDomainClassification (es)": 92.78, + "MTOPDomainClassification (fr)": 92.25, + "MTOPDomainClassification (hi)": 85.26, + "MTOPDomainClassification (th)": 83.35, + "MTOPIntentClassification (en)": 81.23, + "MTOPIntentClassification (de)": 79.28, + "MTOPIntentClassification (es)": 81.2, + "MTOPIntentClassification (fr)": 76.87, + "MTOPIntentClassification (hi)": 63.66, + "MTOPIntentClassification (th)": 69.59, + "MasakhaNEWSClassification (amh)": 53.83, + "MasakhaNEWSClassification (eng)": 80.47, + "MasakhaNEWSClassification (fra)": 81.35, + "MasakhaNEWSClassification (hau)": 75.4, + "MasakhaNEWSClassification (ibo)": 74.18, + "MasakhaNEWSClassification (lin)": 77.43, + "MasakhaNEWSClassification (lug)": 72.38, + "MasakhaNEWSClassification (orm)": 79.02, + "MasakhaNEWSClassification (pcm)": 94.79, + "MasakhaNEWSClassification (run)": 80.81, + "MasakhaNEWSClassification (sna)": 87.29, + "MasakhaNEWSClassification (som)": 65.82, + "MasakhaNEWSClassification (swa)": 75.06, + "MasakhaNEWSClassification (tir)": 35.07, + "MasakhaNEWSClassification (xho)": 84.01, + "MasakhaNEWSClassification (yor)": 83.82, + "MassiveIntentClassification (kn)": 48.65, + "MassiveIntentClassification (is)": 59.76, + "MassiveIntentClassification (sw)": 54.04, + "MassiveIntentClassification (am)": 36.93, + "MassiveIntentClassification (fr)": 75.51, + "MassiveIntentClassification (hy)": 47.26, + "MassiveIntentClassification (es)": 75.14, + "MassiveIntentClassification (id)": 72.84, + "MassiveIntentClassification (it)": 76.46, + "MassiveIntentClassification (zh-CN)": 75.69, + "MassiveIntentClassification (zh-TW)": 70.86, + "MassiveIntentClassification (ml)": 46.31, + "MassiveIntentClassification (ms)": 70.72, + "MassiveIntentClassification (jv)": 55.89, + "MassiveIntentClassification (te)": 46.92, + "MassiveIntentClassification (ur)": 59.32, + "MassiveIntentClassification (vi)": 69.02, + "MassiveIntentClassification (mn)": 44.24, + "MassiveIntentClassification (lv)": 57.35, + "MassiveIntentClassification (th)": 63.08, + "MassiveIntentClassification (hu)": 70.42, + "MassiveIntentClassification (af)": 67.3, + "MassiveIntentClassification (pt)": 76.08, + "MassiveIntentClassification (sq)": 56.51, + "MassiveIntentClassification (he)": 63.87, + "MassiveIntentClassification (ru)": 76.01, + "MassiveIntentClassification (da)": 73.46, + "MassiveIntentClassification (cy)": 53.51, + "MassiveIntentClassification (ja)": 75.19, + "MassiveIntentClassification (en)": 79.1, + "MassiveIntentClassification (fi)": 67.91, + "MassiveIntentClassification (az)": 62.1, + "MassiveIntentClassification (ar)": 60.26, + "MassiveIntentClassification (hi)": 66.02, + "MassiveIntentClassification (sv)": 75.29, + "MassiveIntentClassification (el)": 67.04, + "MassiveIntentClassification (km)": 42.63, + "MassiveIntentClassification (ko)": 71.38, + "MassiveIntentClassification (ro)": 70.1, + "MassiveIntentClassification (ta)": 48.79, + "MassiveIntentClassification (nl)": 75.5, + "MassiveIntentClassification (ka)": 46.6, + "MassiveIntentClassification (nb)": 70.92, + "MassiveIntentClassification (tr)": 70.73, + "MassiveIntentClassification (pl)": 75.36, + "MassiveIntentClassification (bn)": 57.82, + "MassiveIntentClassification (sl)": 69.98, + "MassiveIntentClassification (tl)": 67.86, + "MassiveIntentClassification (my)": 40.8, + "MassiveIntentClassification (de)": 74.8, + "MassiveIntentClassification (fa)": 70.45, + "MassiveScenarioClassification (nb)": 75.62, + "MassiveScenarioClassification (sq)": 62.49, + "MassiveScenarioClassification (sl)": 74.71, + "MassiveScenarioClassification (de)": 78.58, + "MassiveScenarioClassification (fi)": 70.33, + "MassiveScenarioClassification (bn)": 61.88, + "MassiveScenarioClassification (he)": 67.52, + "MassiveScenarioClassification (el)": 70.08, + "MassiveScenarioClassification (vi)": 72.94, + "MassiveScenarioClassification (hu)": 73.71, + "MassiveScenarioClassification (ml)": 51.24, + "MassiveScenarioClassification (ms)": 73.8, + "MassiveScenarioClassification (hi)": 69.18, + "MassiveScenarioClassification (is)": 66.04, + "MassiveScenarioClassification (te)": 52.39, + "MassiveScenarioClassification (sw)": 62.19, + "MassiveScenarioClassification (tl)": 71.83, + "MassiveScenarioClassification (sv)": 78.52, + "MassiveScenarioClassification (en)": 80.37, + "MassiveScenarioClassification (ja)": 77.49, + "MassiveScenarioClassification (az)": 65.13, + "MassiveScenarioClassification (fr)": 77.29, + "MassiveScenarioClassification (it)": 77.88, + "MassiveScenarioClassification (ko)": 75.95, + "MassiveScenarioClassification (zh-CN)": 78.47, + "MassiveScenarioClassification (id)": 75.94, + "MassiveScenarioClassification (kn)": 55.06, + "MassiveScenarioClassification (cy)": 60.36, + "MassiveScenarioClassification (am)": 44.74, + "MassiveScenarioClassification (hy)": 53.06, + "MassiveScenarioClassification (km)": 50.11, + "MassiveScenarioClassification (pt)": 76.23, + "MassiveScenarioClassification (zh-TW)": 75.14, + "MassiveScenarioClassification (ru)": 78.28, + "MassiveScenarioClassification (ta)": 54.59, + "MassiveScenarioClassification (es)": 77.05, + "MassiveScenarioClassification (mn)": 51.04, + "MassiveScenarioClassification (th)": 67.74, + "MassiveScenarioClassification (ur)": 65.78, + "MassiveScenarioClassification (my)": 46.9, + "MassiveScenarioClassification (fa)": 73.52, + "MassiveScenarioClassification (af)": 72.84, + "MassiveScenarioClassification (nl)": 77.79, + "MassiveScenarioClassification (tr)": 71.9, + "MassiveScenarioClassification (ar)": 67.23, + "MassiveScenarioClassification (pl)": 76.25, + "MassiveScenarioClassification (ro)": 72.54, + "MassiveScenarioClassification (jv)": 62.33, + "MassiveScenarioClassification (da)": 76.98, + "MassiveScenarioClassification (lv)": 61.54, + "MassiveScenarioClassification (ka)": 54.44, + "MultilingualSentiment": 75.51, + "NoRecClassification": 56.05, + "NordicLangClassification": 69.36, + "OnlineShopping": 93.09, + "PAC": 67.05, + "PolEmo2.0-IN": 83.5, + "PolEmo2.0-OUT": 62.55, + "RuReviewsClassification": 68.58, + "RuSciBenchGRNTIClassification": 64.56, + "RuSciBenchOECDClassification": 51.2, + "TNews": 51.99, + "ToxicConversationsClassification": 68.81, + "TweetSentimentExtractionClassification": 66.26, + "Waimai": 87.91 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-m3", - "GeoreviewClusteringP2P (rus-Cyrl)": 63.09, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.83, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 43.21 + "Model": "GritLM-7B", + "GeoreviewClusteringP2P": 74.06, + "MasakhaNEWSClusteringP2P (amh)": 45.1, + "MasakhaNEWSClusteringP2P (eng)": 70.5, + "MasakhaNEWSClusteringP2P (fra)": 73.54, + "MasakhaNEWSClusteringP2P (hau)": 59.75, + "MasakhaNEWSClusteringP2P (ibo)": 66.75, + "MasakhaNEWSClusteringP2P (lin)": 59.57, + "MasakhaNEWSClusteringP2P (lug)": 58.93, + "MasakhaNEWSClusteringP2P (orm)": 54.38, + "MasakhaNEWSClusteringP2P (pcm)": 92.67, + "MasakhaNEWSClusteringP2P (run)": 59.74, + "MasakhaNEWSClusteringP2P (sna)": 68.86, + "MasakhaNEWSClusteringP2P (som)": 42.54, + "MasakhaNEWSClusteringP2P (swa)": 33.61, + "MasakhaNEWSClusteringP2P (tir)": 51.66, + "MasakhaNEWSClusteringP2P (xho)": 46.65, + "MasakhaNEWSClusteringP2P (yor)": 52.39, + "MasakhaNEWSClusteringS2S (amh)": 43.39, + "MasakhaNEWSClusteringS2S (eng)": 65.85, + "MasakhaNEWSClusteringS2S (fra)": 68.87, + "MasakhaNEWSClusteringS2S (hau)": 33.02, + "MasakhaNEWSClusteringS2S (ibo)": 64.55, + "MasakhaNEWSClusteringS2S (lin)": 72.01, + "MasakhaNEWSClusteringS2S (lug)": 47.42, + "MasakhaNEWSClusteringS2S (orm)": 32.59, + "MasakhaNEWSClusteringS2S (pcm)": 97.82, + "MasakhaNEWSClusteringS2S (run)": 59.41, + "MasakhaNEWSClusteringS2S (sna)": 71.58, + "MasakhaNEWSClusteringS2S (som)": 40.91, + "MasakhaNEWSClusteringS2S (swa)": 33.54, + "MasakhaNEWSClusteringS2S (tir)": 45.32, + "MasakhaNEWSClusteringS2S (xho)": 28.94, + "MasakhaNEWSClusteringS2S (yor)": 63.26, + "RuSciBenchGRNTIClusteringP2P": 60.01, + "RuSciBenchOECDClusteringP2P": 51.66 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bge-m3", - "OpusparcusPC (rus-Cyrl)": 89.64, - "TERRa (rus-Cyrl)": 60.6 + "Model": "GritLM-7B", + "CDSC-E": 75.61, + "OpusparcusPC (de)": 97.43, + "OpusparcusPC (en)": 99.14, + "OpusparcusPC (fi)": 92.05, + "OpusparcusPC (fr)": 95.14, + "OpusparcusPC (ru)": 91.13, + "OpusparcusPC (sv)": 94.87, + "PSC": 99.42, + "PawsXPairClassification (de)": 57.59, + "PawsXPairClassification (en)": 66.63, + "PawsXPairClassification (es)": 58.9, + "PawsXPairClassification (fr)": 61.48, + "PawsXPairClassification (ja)": 51.11, + "PawsXPairClassification (ko)": 51.9, + "PawsXPairClassification (zh)": 57.86, + "SICK-E-PL": 79.06, + "SprintDuplicateQuestions": 93.0, + "TERRa": 58.86, + "TwitterSemEval2015": 81.08, + "TwitterURLCorpus": 87.4 }, { - "Model": "bge-m3", - "OpusparcusPC (rus-Cyrl)": 89.64, - "TERRa (rus-Cyrl)": 60.6 + "Model": "GritLM-7B", + "CDSC-E": 75.61, + "OpusparcusPC (de)": 97.43, + "OpusparcusPC (en)": 99.14, + "OpusparcusPC (fi)": 92.05, + "OpusparcusPC (fr)": 95.14, + "OpusparcusPC (ru)": 91.13, + "OpusparcusPC (sv)": 94.87, + "PSC": 99.45, + "PawsXPairClassification (de)": 57.66, + "PawsXPairClassification (en)": 66.82, + "PawsXPairClassification (es)": 59.04, + "PawsXPairClassification (fr)": 61.56, + "PawsXPairClassification (ja)": 51.2, + "PawsXPairClassification (ko)": 51.92, + "PawsXPairClassification (zh)": 57.88, + "SICK-E-PL": 79.06, + "SprintDuplicateQuestions": 93.35, + "TERRa": 58.93, + "TwitterSemEval2015": 81.08, + "TwitterURLCorpus": 87.43 } ] }, "Reranking": { "map": [ { - "Model": "bge-m3", - "MIRACLReranking (rus-Cyrl)": 65.38 - }, - { - "Model": "bge-m3", - "RuBQReranking (rus-Cyrl)": 74.03 + "Model": "GritLM-7B", + "AlloprofReranking": 77.93, + "AskUbuntuDupQuestions": 67.37, + "MMarcoReranking": 21.7, + "MindSmallReranking": 31.81, + "RuBQReranking": 72.43, + "SciDocsRR": 86.82, + "StackOverflowDupQuestions": 55.94, + "SyntecReranking": 92.62, + "T2Reranking": 65.64 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-m3", - "ARCChallenge": 9.02, - "AlphaNLI": 24.73, - "HellaSwag": 25.67, - "LEMBNarrativeQARetrieval": 45.76, - "LEMBQMSumRetrieval": 35.54, - "LEMBSummScreenFDRetrieval": 94.09, - "LEMBWikimQARetrieval": 77.73, - "MIRACLRetrieval (rus-Cyrl)": 70.16, - "PIQA": 22.93, - "Quail": 7.51, - "RARbCode": 38.8, - "RARbMath": 69.19, - "RiaNewsRetrieval (rus-Cyrl)": 82.99, - "RuBQRetrieval (rus-Cyrl)": 71.22, - "SIQA": 4.89, - "SpartQA": 7.49, - "TempReasonL1": 0.99, - "TempReasonL2Fact": 33.23, - "TempReasonL2Pure": 0.68, - "TempReasonL3Fact": 30.05, - "TempReasonL3Pure": 5.28, - "WinoGrande": 41.72 - }, + "Model": "GritLM-7B", + "AILACasedocs": 35.29, + "AILAStatutes": 41.8, + "ARCChallenge": 26.68, + "AlloprofRetrieval": 55.42, + "AlphaNLI": 34.0, + "AppsRetrieval": 35.13, + "ArguAna": 63.17, + "ArguAna-PL": 48.96, + "BSARDRetrieval": 26.61, + "BrightRetrieval (pony)": 21.98, + "BrightRetrieval (robotics)": 17.31, + "BrightRetrieval (economics)": 19.0, + "BrightRetrieval (theoremqa_questions)": 23.34, + "BrightRetrieval (leetcode)": 29.85, + "BrightRetrieval (earth_science)": 32.77, + "BrightRetrieval (stackoverflow)": 11.62, + "BrightRetrieval (sustainable_living)": 18.04, + "BrightRetrieval (biology)": 25.04, + "BrightRetrieval (psychology)": 19.92, + "BrightRetrieval (theoremqa_theorems)": 19.75, + "BrightRetrieval (aops)": 8.91, + "CmedqaRetrieval": 35.58, + "CodeFeedbackMT": 50.64, + "CodeFeedbackST": 84.08, + "CodeSearchNetCCRetrieval (python)": 90.92, + "CodeSearchNetCCRetrieval (javascript)": 87.27, + "CodeSearchNetCCRetrieval (go)": 85.44, + "CodeSearchNetCCRetrieval (ruby)": 89.63, + "CodeSearchNetCCRetrieval (java)": 87.63, + "CodeSearchNetCCRetrieval (php)": 79.25, + "CodeSearchNetRetrieval (python)": 91.65, + "CodeSearchNetRetrieval (javascript)": 81.43, + "CodeSearchNetRetrieval (go)": 90.01, + "CodeSearchNetRetrieval (ruby)": 85.42, + "CodeSearchNetRetrieval (java)": 89.84, + "CodeSearchNetRetrieval (php)": 81.57, + "CodeTransOceanContest": 89.22, + "CodeTransOceanDL": 32.98, + "CosQA": 31.24, + "CovidRetrieval": 73.4, + "DuRetrieval": 88.18, + "EcomRetrieval": 54.33, + "FiQA-PL": 37.98, + "FiQA2018": 59.91, + "GerDaLIRSmall": 20.61, + "HellaSwag": 39.45, + "LEMBNarrativeQARetrieval": 41.45, + "LEMBQMSumRetrieval": 30.36, + "LEMBSummScreenFDRetrieval": 78.48, + "LEMBWikimQARetrieval": 60.77, + "LeCaRDv2": 64.22, + "LegalBenchConsumerContractsQA": 82.05, + "LegalBenchCorporateLobbying": 95.0, + "LegalQuAD": 44.18, + "LegalSummarization": 70.64, + "MMarcoRetrieval": 76.54, + "MedicalRetrieval": 55.81, + "MintakaRetrieval (ar)": 25.88, + "MintakaRetrieval (de)": 55.66, + "MintakaRetrieval (es)": 53.36, + "MintakaRetrieval (fr)": 51.68, + "MintakaRetrieval (hi)": 26.06, + "MintakaRetrieval (it)": 54.91, + "MintakaRetrieval (ja)": 34.11, + "MintakaRetrieval (pt)": 54.89, + "NFCorpus": 40.86, + "NFCorpus-PL": 32.85, + "PIQA": 44.35, + "Quail": 11.67, + "RARbCode": 84.02, + "RARbMath": 82.35, + "RuBQRetrieval": 70.94, + "SCIDOCS": 24.41, + "SCIDOCS-PL": 18.34, + "SIQA": 7.23, + "SciFact": 79.13, + "SciFact-PL": 73.22, + "SpartQA": 9.35, + "StackOverflowQA": 93.37, + "SyntecRetrieval": 89.48, + "SyntheticText2SQL": 60.39, + "T2Retrieval": 82.96, + "TRECCOVID": 74.31, + "TRECCOVID-PL": 58.15, + "TempReasonL1": 7.16, + "TempReasonL2Fact": 58.39, + "TempReasonL2Pure": 11.22, + "TempReasonL3Fact": 44.29, + "TempReasonL3Pure": 14.15, + "Touche2020": 27.78, + "VideoRetrieval": 53.85, + "WinoGrande": 53.7, + "XPQARetrieval (ara-ara)": 45.21, + "XPQARetrieval (eng-ara)": 27.34, + "XPQARetrieval (ara-eng)": 39.43, + "XPQARetrieval (deu-deu)": 76.58, + "XPQARetrieval (eng-deu)": 55.44, + "XPQARetrieval (deu-eng)": 72.56, + "XPQARetrieval (spa-spa)": 64.54, + "XPQARetrieval (eng-spa)": 45.5, + "XPQARetrieval (spa-eng)": 61.03, + "XPQARetrieval (fra-fra)": 70.84, + "XPQARetrieval (eng-fra)": 48.14, + "XPQARetrieval (fra-eng)": 66.96, + "XPQARetrieval (hin-hin)": 74.75, + "XPQARetrieval (eng-hin)": 25.62, + "XPQARetrieval (hin-eng)": 63.9, + "XPQARetrieval (ita-ita)": 76.53, + "XPQARetrieval (eng-ita)": 46.82, + "XPQARetrieval (ita-eng)": 71.03, + "XPQARetrieval (jpn-jpn)": 72.28, + "XPQARetrieval (eng-jpn)": 41.9, + "XPQARetrieval (jpn-eng)": 69.42, + "XPQARetrieval (kor-kor)": 40.64, + "XPQARetrieval (eng-kor)": 32.68, + "XPQARetrieval (kor-eng)": 36.0, + "XPQARetrieval (pol-pol)": 50.75, + "XPQARetrieval (eng-pol)": 33.14, + "XPQARetrieval (pol-eng)": 48.06, + "XPQARetrieval (por-por)": 49.86, + "XPQARetrieval (eng-por)": 33.06, + "XPQARetrieval (por-eng)": 48.45, + "XPQARetrieval (tam-tam)": 41.78, + "XPQARetrieval (eng-tam)": 10.95, + "XPQARetrieval (tam-eng)": 21.26, + "XPQARetrieval (cmn-cmn)": 65.29, + "XPQARetrieval (eng-cmn)": 35.86, + "XPQARetrieval (cmn-eng)": 58.12 + } + ], + "recall_at_1": [ { - "Model": "bge-m3", - "LEMBNeedleRetrieval": 40.25, - "LEMBPasskeyRetrieval": 46.0 + "Model": "GritLM-7B", + "BrightRetrieval (biology)": 37.46, + "BrightRetrieval (robotics)": 17.82, + "BrightRetrieval (pony)": 0.0, + "BrightRetrieval (sustainable_living)": 32.36, + "BrightRetrieval (psychology)": 35.35, + "BrightRetrieval (stackoverflow)": 20.08, + "BrightRetrieval (earth_science)": 39.44, + "BrightRetrieval (economics)": 25.73 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bge-m3", - "RUParaPhraserSTS (rus-Cyrl)": 74.9, - "RuSTSBenchmarkSTS (rus-Cyrl)": 79.87, - "STS22 (rus-Cyrl)": 66.26, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 79.27 - } - ] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ - { - "Model": "bge-m3", - "CEDRClassification (rus-Cyrl)": 43.47, - "SensitiveTopicsClassification (rus-Cyrl)": 26.25 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "e5-small": { - "BitextMining": { - "f1": [ + "Model": "GritLM-7B", + "AFQMC": 35.59, + "ATEC": 40.89, + "BIOSSES": 86.32, + "BQ": 49.18, + "CDSC-R": 93.38, + "LCQMC": 75.52, + "PAWSX": 16.4, + "RUParaPhraserSTS": 74.37, + "RuSTSBenchmarkSTS": 81.07, + "SICK-R": 83.13, + "SICK-R-PL": 76.45, + "SICKFr": 80.25, + "STS12": 77.34, + "STS13": 85.04, + "STS14": 82.91, + "STS15": 88.13, + "STS16": 86.24, + "STS17 (it-en)": 88.43, + "STS17 (nl-en)": 88.29, + "STS17 (en-tr)": 77.48, + "STS17 (fr-en)": 87.9, + "STS17 (en-de)": 88.93, + "STS17 (ar-ar)": 79.26, + "STS17 (ko-ko)": 78.74, + "STS17 (en-ar)": 74.46, + "STS17 (en-en)": 90.14, + "STS17 (es-en)": 87.47, + "STS17 (es-es)": 87.12, + "STSB": 78.12, + "STSBenchmark": 85.64, + "STSBenchmarkMultilingualSTS (en)": 85.65, + "STSBenchmarkMultilingualSTS (fr)": 81.96, + "STSBenchmarkMultilingualSTS (pl)": 80.21, + "STSBenchmarkMultilingualSTS (ru)": 80.84, + "STSBenchmarkMultilingualSTS (nl)": 80.37, + "STSBenchmarkMultilingualSTS (es)": 82.81, + "STSBenchmarkMultilingualSTS (pt)": 80.98, + "STSBenchmarkMultilingualSTS (de)": 82.08, + "STSBenchmarkMultilingualSTS (it)": 81.69, + "STSBenchmarkMultilingualSTS (zh)": 79.73 + }, { - "Model": "e5-small", - "BornholmBitextMining": 40.27 + "Model": "GritLM-7B", + "AFQMC": 35.59, + "ATEC": 40.89, + "BIOSSES": 86.32, + "BQ": 49.18, + "CDSC-R": 93.38, + "LCQMC": 75.52, + "PAWSX": 16.4, + "RUParaPhraserSTS": 74.37, + "RuSTSBenchmarkSTS": 81.07, + "SICK-R": 83.13, + "SICK-R-PL": 76.45, + "SICKFr": 80.25, + "STS12": 77.34, + "STS13": 85.04, + "STS14": 82.91, + "STS15": 88.13, + "STS16": 86.24, + "STS17 (it-en)": 88.43, + "STS17 (nl-en)": 88.29, + "STS17 (en-tr)": 77.48, + "STS17 (fr-en)": 87.9, + "STS17 (en-de)": 88.93, + "STS17 (ar-ar)": 79.26, + "STS17 (ko-ko)": 78.74, + "STS17 (en-ar)": 74.46, + "STS17 (en-en)": 90.14, + "STS17 (es-en)": 87.47, + "STS17 (es-es)": 87.12, + "STSB": 78.12, + "STSBenchmark": 85.64, + "STSBenchmarkMultilingualSTS (en)": 85.65, + "STSBenchmarkMultilingualSTS (fr)": 81.96, + "STSBenchmarkMultilingualSTS (pl)": 80.21, + "STSBenchmarkMultilingualSTS (ru)": 80.84, + "STSBenchmarkMultilingualSTS (nl)": 80.37, + "STSBenchmarkMultilingualSTS (es)": 82.81, + "STSBenchmarkMultilingualSTS (pt)": 80.98, + "STSBenchmarkMultilingualSTS (de)": 82.08, + "STSBenchmarkMultilingualSTS (it)": 81.69, + "STSBenchmarkMultilingualSTS (zh)": 79.73 } ] }, - "Classification": { - "accuracy": [ + "Summarization": { + "cosine_spearman": [ { - "Model": "e5-small", - "AngryTweetsClassification": 43.6, - "DKHateClassification": 57.57, - "DanishPoliticalCommentsClassification": 28.37, - "LccSentimentClassification": 40.27, - "MassiveIntentClassification (da)": 41.89, - "MassiveIntentClassification (nb)": 40.25, - "MassiveIntentClassification (sv)": 40.07, - "MassiveScenarioClassification (da)": 49.93, - "MassiveScenarioClassification (nb)": 48.58, - "MassiveScenarioClassification (sv)": 47.06, - "NoRecClassification": 41.84, - "NordicLangClassification": 53.47, - "NorwegianParliament": 56.57, - "ScalaDaClassification": 50.15, - "ScalaNbClassification": 50.03 + "Model": "GritLM-7B", + "SummEval": 30.39 + }, + { + "Model": "GritLM-7B", + "SummEval": 30.39 } ] }, - "Clustering": { - "v_measure": [ + "MultilabelClassification": { + "accuracy": [ { - "Model": "e5-small", - "BiorxivClusteringP2P": 36.1, - "BiorxivClusteringS2S": 31.51, - "MedrxivClusteringP2P": 31.31, - "MedrxivClusteringS2S": 28.32, - "RedditClustering": 43.27, - "RedditClusteringP2P": 57.22, - "StackExchangeClustering": 59.6, - "StackExchangeClusteringP2P": 30.82, - "TwentyNewsgroupsClustering": 37.65 + "Model": "GritLM-7B", + "CEDRClassification": 42.68, + "SensitiveTopicsClassification": 28.52 } ] }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "GritLM-7B", + "Core17InstructionRetrieval": 6.7, + "News21InstructionRetrieval": 1.22, + "Robust04InstructionRetrieval": 2.44 + } + ] } }, - "jina-embeddings-v2-base-en": { + "GritLM-7B-noinstruct": { "BitextMining": { "f1": [] }, @@ -1096,59 +1204,25 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "jina-embeddings-v2-base-en", - "LEMBNarrativeQARetrieval": 37.89, - "LEMBQMSumRetrieval": 38.87, - "LEMBSummScreenFDRetrieval": 93.48, - "LEMBWikimQARetrieval": 73.99 - }, - { - "Model": "jina-embeddings-v2-base-en", - "LEMBNeedleRetrieval": 54.25, - "LEMBPasskeyRetrieval": 50.25 - } - ] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "use-cmlm-multilingual": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [ - { - "Model": "use-cmlm-multilingual", - "BlurbsClusteringP2P": 29.63, - "BlurbsClusteringS2S": 15.24, - "TenKGnadClusteringP2P": 37.1, - "TenKGnadClusteringS2S": 25.64 + "Model": "GritLM-7B-noinstruct", + "ARCChallenge": 16.57, + "AlphaNLI": 29.56, + "HellaSwag": 36.03, + "PIQA": 35.8, + "Quail": 8.68, + "RARbCode": 83.14, + "RARbMath": 83.01, + "SIQA": 5.73, + "SpartQA": 1.56, + "TempReasonL1": 2.57, + "TempReasonL2Fact": 48.25, + "TempReasonL2Pure": 8.98, + "TempReasonL3Fact": 34.11, + "TempReasonL3Pure": 12.44, + "WinoGrande": 52.12 } ] }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, "STS": { "cosine_spearman": [] }, @@ -1162,239 +1236,392 @@ "p-MRR": [] } }, - "deberta-v1-base": { + "LASER2": { "BitextMining": { "f1": [ { - "Model": "deberta-v1-base", - "Tatoeba (rus-Cyrl_eng-Latn)": 13.21 + "Model": "LASER2", + "BUCC": 97.7, + "Tatoeba": 95.41 } ] }, "Classification": { "accuracy": [ { - "Model": "deberta-v1-base", - "GeoreviewClassification (rus-Cyrl)": 40.19, - "HeadlineClassification (rus-Cyrl)": 78.75, - "InappropriatenessClassification (rus-Cyrl)": 61.33, - "KinopoiskClassification (rus-Cyrl)": 48.78, - "MassiveIntentClassification (rus-Cyrl)": 61.32, - "MassiveScenarioClassification (rus-Cyrl)": 64.71, - "RuReviewsClassification (rus-Cyrl)": 55.66, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 53.53, - "RuSciBenchOECDClassification (rus-Cyrl)": 41.34 + "Model": "LASER2", + "AmazonCounterfactualClassification": 68.76, + "AmazonPolarityClassification": 61.01, + "AmazonReviewsClassification": 30.89, + "Banking77Classification": 57.76, + "EmotionClassification": 24.83, + "ImdbClassification": 57.58, + "MTOPDomainClassification": 72.68, + "MTOPIntentClassification": 50.07, + "MasakhaNEWSClassification": 65.9, + "MassiveIntentClassification": 32.93, + "MassiveScenarioClassification": 42.32, + "ToxicConversationsClassification": 54.05, + "TweetSentimentExtractionClassification": 48.73 } ] }, "Clustering": { "v_measure": [ { - "Model": "deberta-v1-base", - "GeoreviewClusteringP2P (rus-Cyrl)": 58.79, - "MLSUMClusteringP2P (rus-Cyrl)": 47.33, - "MLSUMClusteringS2S (rus-Cyrl)": 44.6, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 36.66, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 33.31 + "Model": "LASER2", + "AlloProfClusteringP2P": 48.45, + "AlloProfClusteringS2S": 25.81, + "ArxivClusteringP2P": 17.77, + "ArxivClusteringS2S": 12.39, + "BiorxivClusteringP2P": 12.4, + "BiorxivClusteringS2S": 8.83, + "HALClusteringS2S": 11.52, + "MLSUMClusteringP2P": 34.53, + "MLSUMClusteringS2S": 27.35, + "MasakhaNEWSClusteringP2P": 32.04, + "MasakhaNEWSClusteringS2S": 29.77, + "MedrxivClusteringP2P": 17.91, + "MedrxivClusteringS2S": 16.63, + "RedditClustering": 9.96, + "RedditClusteringP2P": 26.42, + "StackExchangeClustering": 15.79, + "StackExchangeClusteringP2P": 18.63, + "TwentyNewsgroupsClustering": 11.38 } ] }, "PairClassification": { "max_ap": [ { - "Model": "deberta-v1-base", - "OpusparcusPC (rus-Cyrl)": 83.31, - "TERRa (rus-Cyrl)": 53.78 + "Model": "LASER2", + "OpusparcusPC": 93.77, + "PawsXPairClassification": 69.53, + "SprintDuplicateQuestions": 65.54, + "TwitterSemEval2015": 59.57, + "TwitterURLCorpus": 81.47 }, { - "Model": "deberta-v1-base", - "OpusparcusPC (rus-Cyrl)": 83.69, - "TERRa (rus-Cyrl)": 56.49 + "Model": "LASER2", + "OpusparcusPC": 93.77, + "PawsXPairClassification": 70.31, + "SprintDuplicateQuestions": 68.48, + "TwitterSemEval2015": 59.57, + "TwitterURLCorpus": 81.47 } ] }, "Reranking": { "map": [ { - "Model": "deberta-v1-base", - "RuBQReranking (rus-Cyrl)": 34.01 + "Model": "LASER2", + "AlloprofReranking": 35.29, + "AskUbuntuDupQuestions": 48.99, + "MindSmallReranking": 24.79, + "SciDocsRR": 54.99, + "StackOverflowDupQuestions": 36.98, + "SyntecReranking": 55.93 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "deberta-v1-base", - "RiaNewsRetrieval (rus-Cyrl)": 4.84, - "RuBQRetrieval (rus-Cyrl)": 10.15 + "Model": "LASER2", + "AlloprofRetrieval": 3.1, + "ArguAna": 12.86, + "BSARDRetrieval": 0.36, + "CQADupstackRetrieval": 4.12, + "ClimateFEVER": 0.36, + "DBPedia": 1.53, + "FEVER": 0.77, + "FiQA2018": 1.73, + "HotpotQA": 5.5, + "MSMARCO": 1.09, + "MintakaRetrieval": 6.31, + "NFCorpus": 2.44, + "NQ": 0.64, + "QuoraRetrieval": 71.14, + "SCIDOCS": 0.78, + "SciFact": 4.04, + "SyntecRetrieval": 28.58, + "TRECCOVID": 10.97, + "Touche2020": 1.06, + "XPQARetrieval": 42.59 } ] }, "STS": { "cosine_spearman": [ { - "Model": "deberta-v1-base", - "RUParaPhraserSTS (rus-Cyrl)": 54.03, - "RuSTSBenchmarkSTS (rus-Cyrl)": 58.47, - "STS22 (rus-Cyrl)": 47.67, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 58.45 + "Model": "LASER2", + "BIOSSES": 62.01, + "SICK-R": 62.86, + "SICKFr": 64.95, + "STS12": 62.6, + "STS13": 59.62, + "STS14": 57.03, + "STS15": 71.57, + "STS16": 70.75, + "STS17": 68.12, + "STS22": 46.19, + "STSBenchmark": 69.77, + "STSBenchmarkMultilingualSTS": 69.82 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ + "cosine_spearman": [ { - "Model": "deberta-v1-base", - "CEDRClassification (rus-Cyrl)": 34.14, - "SensitiveTopicsClassification (rus-Cyrl)": 23.67 + "Model": "LASER2", + "SummEval": 26.8, + "SummEvalFr": 31.56 } ] }, + "MultilabelClassification": { + "accuracy": [] + }, "InstructionRetrieval": { "p-MRR": [] } }, - "rubert-tiny2": { + "LLM2Vec-Llama-2-supervised": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "rubert-tiny2", - "GeoreviewClassification (rus-Cyrl)": 39.64, - "HeadlineClassification (rus-Cyrl)": 74.19, - "InappropriatenessClassification (rus-Cyrl)": 58.57, - "KinopoiskClassification (rus-Cyrl)": 49.06, - "MassiveIntentClassification (rus-Cyrl)": 50.83, - "MassiveScenarioClassification (rus-Cyrl)": 59.15, - "RuReviewsClassification (rus-Cyrl)": 56.99, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 45.63, - "RuSciBenchOECDClassification (rus-Cyrl)": 35.48 - } + "Model": "LLM2Vec-Llama-2-supervised", + "AmazonCounterfactualClassification": 82.22, + "AmazonPolarityClassification": 89.69, + "AmazonReviewsClassification": 48.47, + "Banking77Classification": 88.17, + "EmotionClassification": 51.71, + "ImdbClassification": 85.78, + "MTOPDomainClassification": 95.57, + "MTOPIntentClassification": 82.81, + "MassiveIntentClassification": 78.06, + "MassiveScenarioClassification": 81.35, + "ToxicConversationsClassification": 71.01, + "TweetSentimentExtractionClassification": 61.11 + } ] }, "Clustering": { "v_measure": [ { - "Model": "rubert-tiny2", - "GeoreviewClusteringP2P (rus-Cyrl)": 41.58, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 39.78, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 35.98 + "Model": "LLM2Vec-Llama-2-supervised", + "ArxivClusteringP2P": 43.14, + "ArxivClusteringS2S": 42.38, + "BiorxivClusteringP2P": 35.88, + "BiorxivClusteringS2S": 34.81, + "MedrxivClusteringP2P": 32.23, + "MedrxivClusteringS2S": 31.37, + "RedditClustering": 61.1, + "RedditClusteringP2P": 64.52, + "StackExchangeClustering": 67.98, + "StackExchangeClusteringP2P": 33.2, + "TwentyNewsgroupsClustering": 51.04 } ] }, "PairClassification": { "max_ap": [ { - "Model": "rubert-tiny2", - "TERRa (rus-Cyrl)": 51.87 + "Model": "LLM2Vec-Llama-2-supervised", + "SprintDuplicateQuestions": 96.83, + "TwitterSemEval2015": 80.7, + "TwitterURLCorpus": 86.56 }, { - "Model": "rubert-tiny2", - "TERRa (rus-Cyrl)": 51.87 + "Model": "LLM2Vec-Llama-2-supervised", + "SprintDuplicateQuestions": 96.83, + "TwitterSemEval2015": 80.7, + "TwitterURLCorpus": 86.56 } ] }, "Reranking": { "map": [ { - "Model": "rubert-tiny2", - "MIRACLReranking (rus-Cyrl)": 15.81 - }, - { - "Model": "rubert-tiny2", - "RuBQReranking (rus-Cyrl)": 46.09 + "Model": "LLM2Vec-Llama-2-supervised", + "AskUbuntuDupQuestions": 63.13, + "MindSmallReranking": 31.34, + "SciDocsRR": 84.03, + "StackOverflowDupQuestions": 51.02 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "rubert-tiny2", - "MIRACLRetrieval (rus-Cyrl)": 1.89, - "RiaNewsRetrieval (rus-Cyrl)": 13.92, - "RuBQRetrieval (rus-Cyrl)": 10.87 + "Model": "LLM2Vec-Llama-2-supervised", + "ArguAna": 56.53, + "CQADupstackRetrieval": 45.94, + "ClimateFEVER": 30.7, + "DBPedia": 48.42, + "FEVER": 89.93, + "FiQA2018": 51.28, + "HotpotQA": 72.99, + "MSMARCO": 41.46, + "NFCorpus": 40.33, + "NQ": 61.24, + "QuoraRetrieval": 85.59, + "SCIDOCS": 21.05, + "SciFact": 77.3, + "TRECCOVID": 79.25, + "Touche2020": 16.92 } ] }, "STS": { "cosine_spearman": [ { - "Model": "rubert-tiny2", - "RUParaPhraserSTS (rus-Cyrl)": 65.14, - "RuSTSBenchmarkSTS (rus-Cyrl)": 69.43, - "STS22 (rus-Cyrl)": 50.23 + "Model": "LLM2Vec-Llama-2-supervised", + "BIOSSES": 82.13, + "SICK-R": 83.01, + "STS12": 78.85, + "STS13": 86.84, + "STS14": 84.04, + "STS15": 88.72, + "STS16": 86.79, + "STS17": 90.63, + "STS22": 67.55, + "STSBenchmark": 88.72 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ + "cosine_spearman": [ { - "Model": "rubert-tiny2", - "CEDRClassification (rus-Cyrl)": 36.87, - "SensitiveTopicsClassification (rus-Cyrl)": 22.03 + "Model": "LLM2Vec-Llama-2-supervised", + "SummEval": 28.49 } ] }, + "MultilabelClassification": { + "accuracy": [] + }, "InstructionRetrieval": { "p-MRR": [] } }, - "DanskBERT": { + "LLM2Vec-Llama-2-unsupervised": { "BitextMining": { - "f1": [ - { - "Model": "DanskBERT", - "BornholmBitextMining": 6.34 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "DanskBERT", - "AngryTweetsClassification": 54.28, - "DKHateClassification": 59.3, - "DanishPoliticalCommentsClassification": 39.81, - "LccSentimentClassification": 58.0, - "MassiveIntentClassification (da)": 54.68, - "MassiveIntentClassification (nb)": 45.38, - "MassiveIntentClassification (sv)": 40.82, - "MassiveScenarioClassification (da)": 59.56, - "MassiveScenarioClassification (nb)": 47.55, - "MassiveScenarioClassification (sv)": 40.14, - "NoRecClassification": 46.06, - "NordicLangClassification": 74.25, - "NorwegianParliament": 56.79, - "ScalaDaClassification": 66.59, - "ScalaNbClassification": 59.99 + "Model": "LLM2Vec-Llama-2-unsupervised", + "AmazonCounterfactualClassification": 76.91, + "AmazonPolarityClassification": 79.05, + "AmazonReviewsClassification": 40.08, + "Banking77Classification": 84.65, + "EmotionClassification": 46.58, + "ImdbClassification": 75.68, + "MTOPDomainClassification": 94.33, + "MTOPIntentClassification": 79.54, + "MassiveIntentClassification": 73.84, + "MassiveScenarioClassification": 79.17, + "ToxicConversationsClassification": 71.81, + "TweetSentimentExtractionClassification": 57.17 } ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "ArxivClusteringP2P": 47.81, + "ArxivClusteringS2S": 40.53, + "BiorxivClusteringP2P": 38.12, + "BiorxivClusteringS2S": 31.25, + "MedrxivClusteringP2P": 30.94, + "MedrxivClusteringS2S": 28.04, + "RedditClustering": 42.84, + "RedditClusteringP2P": 60.1, + "StackExchangeClustering": 65.12, + "StackExchangeClusteringP2P": 33.61, + "TwentyNewsgroupsClustering": 30.76 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "SprintDuplicateQuestions": 87.57, + "TwitterSemEval2015": 65.14, + "TwitterURLCorpus": 80.94 + }, + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "SprintDuplicateQuestions": 87.57, + "TwitterSemEval2015": 65.14, + "TwitterURLCorpus": 80.94 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "AskUbuntuDupQuestions": 55.56, + "MindSmallReranking": 30.86, + "SciDocsRR": 77.62, + "StackOverflowDupQuestions": 47.77 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "ArguAna": 47.09, + "CQADupstackRetrieval": 30.78, + "ClimateFEVER": 20.67, + "DBPedia": 25.81, + "FEVER": 43.48, + "FiQA2018": 24.62, + "HotpotQA": 48.46, + "MSMARCO": 18.81, + "NFCorpus": 26.81, + "NQ": 33.21, + "QuoraRetrieval": 86.15, + "SCIDOCS": 10.0, + "SciFact": 64.48, + "TRECCOVID": 60.67, + "Touche2020": 10.18 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "BIOSSES": 82.41, + "SICK-R": 71.77, + "STS12": 65.39, + "STS13": 79.26, + "STS14": 72.98, + "STS15": 82.72, + "STS16": 81.02, + "STS17": 86.7, + "STS22": 63.47, + "STSBenchmark": 78.32 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LLM2Vec-Llama-2-unsupervised", + "SummEval": 31.38 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -1403,928 +1630,121 @@ "p-MRR": [] } }, - "paraphrase-multilingual-mpnet-base-v2": { + "LLM2Vec-Meta-Llama-3-supervised": { "BitextMining": { - "f1": [ + "f1": [] + }, + "Classification": { + "accuracy": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "BUCC (de-en)": 98.59, - "BUCC (fr-en)": 96.89, - "BUCC (ru-en)": 96.44, - "BUCC (zh-en)": 97.56, - "BornholmBitextMining (dan-Latn)": 18.18, - "Tatoeba (rus-Cyrl_eng-Latn)": 92.92, - "Tatoeba (slv-Latn_eng-Latn)": 97.08, - "Tatoeba (fin-Latn_eng-Latn)": 95.92, - "Tatoeba (arq-Arab_eng-Latn)": 19.84, - "Tatoeba (ukr-Cyrl_eng-Latn)": 92.67, - "Tatoeba (csb-Latn_eng-Latn)": 23.73, - "Tatoeba (uzb-Latn_eng-Latn)": 23.19, - "Tatoeba (tgl-Latn_eng-Latn)": 17.67, - "Tatoeba (nno-Latn_eng-Latn)": 81.41, - "Tatoeba (xho-Latn_eng-Latn)": 6.53, - "Tatoeba (jpn-Jpan_eng-Latn)": 92.51, - "Tatoeba (kor-Hang_eng-Latn)": 93.07, - "Tatoeba (tel-Telu_eng-Latn)": 79.73, - "Tatoeba (slk-Latn_eng-Latn)": 96.62, - "Tatoeba (gsw-Latn_eng-Latn)": 25.12, - "Tatoeba (ceb-Latn_eng-Latn)": 7.39, - "Tatoeba (pes-Arab_eng-Latn)": 93.47, - "Tatoeba (cor-Latn_eng-Latn)": 3.53, - "Tatoeba (mal-Mlym_eng-Latn)": 88.46, - "Tatoeba (deu-Latn_eng-Latn)": 97.73, - "Tatoeba (glg-Latn_eng-Latn)": 95.32, - "Tatoeba (isl-Latn_eng-Latn)": 59.25, - "Tatoeba (tuk-Latn_eng-Latn)": 14.91, - "Tatoeba (ido-Latn_eng-Latn)": 43.91, - "Tatoeba (gle-Latn_eng-Latn)": 16.85, - "Tatoeba (ron-Latn_eng-Latn)": 96.43, - "Tatoeba (epo-Latn_eng-Latn)": 55.12, - "Tatoeba (tat-Cyrl_eng-Latn)": 10.89, - "Tatoeba (war-Latn_eng-Latn)": 7.42, - "Tatoeba (hye-Armn_eng-Latn)": 94.38, - "Tatoeba (arz-Arab_eng-Latn)": 55.69, - "Tatoeba (nob-Latn_eng-Latn)": 98.53, - "Tatoeba (amh-Ethi_eng-Latn)": 53.49, - "Tatoeba (dtp-Latn_eng-Latn)": 5.03, - "Tatoeba (lvs-Latn_eng-Latn)": 97.53, - "Tatoeba (tam-Taml_eng-Latn)": 73.6, - "Tatoeba (ben-Beng_eng-Latn)": 64.9, - "Tatoeba (hin-Deva_eng-Latn)": 97.75, - "Tatoeba (kat-Geor_eng-Latn)": 95.46, - "Tatoeba (fao-Latn_eng-Latn)": 38.24, - "Tatoeba (gla-Latn_eng-Latn)": 4.72, - "Tatoeba (urd-Arab_eng-Latn)": 95.12, - "Tatoeba (mar-Deva_eng-Latn)": 93.83, - "Tatoeba (bul-Cyrl_eng-Latn)": 93.52, - "Tatoeba (afr-Latn_eng-Latn)": 72.96, - "Tatoeba (swg-Latn_eng-Latn)": 22.8, - "Tatoeba (ber-Tfng_eng-Latn)": 4.88, - "Tatoeba (bos-Latn_eng-Latn)": 94.02, - "Tatoeba (nld-Latn_eng-Latn)": 95.5, - "Tatoeba (fra-Latn_eng-Latn)": 93.12, - "Tatoeba (hrv-Latn_eng-Latn)": 97.0, - "Tatoeba (dsb-Latn_eng-Latn)": 36.85, - "Tatoeba (lat-Latn_eng-Latn)": 24.25, - "Tatoeba (vie-Latn_eng-Latn)": 97.23, - "Tatoeba (max-Deva_eng-Latn)": 48.77, - "Tatoeba (uig-Arab_eng-Latn)": 48.35, - "Tatoeba (srp-Cyrl_eng-Latn)": 94.12, - "Tatoeba (nov-Latn_eng-Latn)": 50.23, - "Tatoeba (ita-Latn_eng-Latn)": 93.76, - "Tatoeba (swe-Latn_eng-Latn)": 95.45, - "Tatoeba (kur-Latn_eng-Latn)": 61.44, - "Tatoeba (ell-Grek_eng-Latn)": 94.93, - "Tatoeba (orv-Cyrl_eng-Latn)": 23.77, - "Tatoeba (nds-Latn_eng-Latn)": 38.88, - "Tatoeba (mkd-Cyrl_eng-Latn)": 93.02, - "Tatoeba (ile-Latn_eng-Latn)": 60.36, - "Tatoeba (jav-Latn_eng-Latn)": 23.39, - "Tatoeba (mon-Cyrl_eng-Latn)": 96.14, - "Tatoeba (tzl-Latn_eng-Latn)": 34.21, - "Tatoeba (cmn-Hans_eng-Latn)": 95.83, - "Tatoeba (pms-Latn_eng-Latn)": 34.19, - "Tatoeba (cha-Latn_eng-Latn)": 12.59, - "Tatoeba (yue-Hant_eng-Latn)": 77.58, - "Tatoeba (lit-Latn_eng-Latn)": 95.37, - "Tatoeba (oci-Latn_eng-Latn)": 43.49, - "Tatoeba (tha-Thai_eng-Latn)": 95.99, - "Tatoeba (khm-Khmr_eng-Latn)": 58.8, - "Tatoeba (ina-Latn_eng-Latn)": 84.32, - "Tatoeba (lfn-Latn_eng-Latn)": 49.56, - "Tatoeba (est-Latn_eng-Latn)": 98.4, - "Tatoeba (tur-Latn_eng-Latn)": 96.17, - "Tatoeba (kzj-Latn_eng-Latn)": 5.88, - "Tatoeba (dan-Latn_eng-Latn)": 96.17, - "Tatoeba (wuu-Hans_eng-Latn)": 78.25, - "Tatoeba (cbk-Latn_eng-Latn)": 58.68, - "Tatoeba (bre-Latn_eng-Latn)": 6.42, - "Tatoeba (awa-Deva_eng-Latn)": 42.83, - "Tatoeba (kab-Latn_eng-Latn)": 1.41, - "Tatoeba (pam-Latn_eng-Latn)": 5.39, - "Tatoeba (por-Latn_eng-Latn)": 93.02, - "Tatoeba (fry-Latn_eng-Latn)": 43.54, - "Tatoeba (swh-Latn_eng-Latn)": 16.02, - "Tatoeba (yid-Hebr_eng-Latn)": 30.73, - "Tatoeba (sqi-Latn_eng-Latn)": 98.57, - "Tatoeba (hun-Latn_eng-Latn)": 94.18, - "Tatoeba (heb-Hebr_eng-Latn)": 88.26, - "Tatoeba (ara-Arab_eng-Latn)": 90.19, - "Tatoeba (cym-Latn_eng-Latn)": 22.31, - "Tatoeba (spa-Latn_eng-Latn)": 97.0, - "Tatoeba (bel-Cyrl_eng-Latn)": 79.94, - "Tatoeba (kaz-Cyrl_eng-Latn)": 61.49, - "Tatoeba (ind-Latn_eng-Latn)": 93.5, - "Tatoeba (hsb-Latn_eng-Latn)": 44.32, - "Tatoeba (ang-Latn_eng-Latn)": 16.72, - "Tatoeba (aze-Latn_eng-Latn)": 76.36, - "Tatoeba (pol-Latn_eng-Latn)": 96.95, - "Tatoeba (mhr-Cyrl_eng-Latn)": 7.57, - "Tatoeba (ast-Latn_eng-Latn)": 70.08, - "Tatoeba (cat-Latn_eng-Latn)": 96.05, - "Tatoeba (zsm-Latn_eng-Latn)": 95.8, - "Tatoeba (ces-Latn_eng-Latn)": 95.73, - "Tatoeba (eus-Latn_eng-Latn)": 31.33, - "Tatoeba (afr-eng)": 72.96, - "Tatoeba (amh-eng)": 53.49, - "Tatoeba (ang-eng)": 16.72, - "Tatoeba (ara-eng)": 90.19, - "Tatoeba (arq-eng)": 19.84, - "Tatoeba (arz-eng)": 55.69, - "Tatoeba (ast-eng)": 70.08, - "Tatoeba (awa-eng)": 42.83, - "Tatoeba (aze-eng)": 76.36, - "Tatoeba (bel-eng)": 79.94, - "Tatoeba (ben-eng)": 64.9, - "Tatoeba (ber-eng)": 4.88, - "Tatoeba (bos-eng)": 94.02, - "Tatoeba (bre-eng)": 6.42, - "Tatoeba (bul-eng)": 93.52, - "Tatoeba (cat-eng)": 96.05, - "Tatoeba (cbk-eng)": 58.68, - "Tatoeba (ceb-eng)": 7.39, - "Tatoeba (ces-eng)": 95.73, - "Tatoeba (cha-eng)": 12.59, - "Tatoeba (cmn-eng)": 95.83, - "Tatoeba (cor-eng)": 3.53, - "Tatoeba (csb-eng)": 23.73, - "Tatoeba (cym-eng)": 22.31, - "Tatoeba (dan-eng)": 96.17, - "Tatoeba (deu-eng)": 97.73, - "Tatoeba (dsb-eng)": 36.85, - "Tatoeba (dtp-eng)": 5.03, - "Tatoeba (ell-eng)": 94.93, - "Tatoeba (epo-eng)": 55.12, - "Tatoeba (est-eng)": 98.4, - "Tatoeba (eus-eng)": 31.33, - "Tatoeba (fao-eng)": 38.24, - "Tatoeba (fin-eng)": 95.92, - "Tatoeba (fra-eng)": 93.12, - "Tatoeba (fry-eng)": 43.54, - "Tatoeba (gla-eng)": 4.72, - "Tatoeba (gle-eng)": 16.85, - "Tatoeba (glg-eng)": 95.32, - "Tatoeba (gsw-eng)": 25.12, - "Tatoeba (heb-eng)": 88.26, - "Tatoeba (hin-eng)": 97.75, - "Tatoeba (hrv-eng)": 97.0, - "Tatoeba (hsb-eng)": 44.32, - "Tatoeba (hun-eng)": 94.18, - "Tatoeba (hye-eng)": 94.38, - "Tatoeba (ido-eng)": 43.91, - "Tatoeba (ile-eng)": 60.36, - "Tatoeba (ina-eng)": 84.32, - "Tatoeba (ind-eng)": 93.5, - "Tatoeba (isl-eng)": 59.25, - "Tatoeba (ita-eng)": 93.76, - "Tatoeba (jav-eng)": 23.39, - "Tatoeba (jpn-eng)": 92.51, - "Tatoeba (kab-eng)": 1.41, - "Tatoeba (kat-eng)": 95.46, - "Tatoeba (kaz-eng)": 61.49, - "Tatoeba (khm-eng)": 58.8, - "Tatoeba (kor-eng)": 93.07, - "Tatoeba (kur-eng)": 61.44, - "Tatoeba (kzj-eng)": 5.88, - "Tatoeba (lat-eng)": 24.25, - "Tatoeba (lfn-eng)": 49.56, - "Tatoeba (lit-eng)": 95.37, - "Tatoeba (lvs-eng)": 97.53, - "Tatoeba (mal-eng)": 88.46, - "Tatoeba (mar-eng)": 93.83, - "Tatoeba (max-eng)": 48.77, - "Tatoeba (mhr-eng)": 7.57, - "Tatoeba (mkd-eng)": 93.02, - "Tatoeba (mon-eng)": 96.14, - "Tatoeba (nds-eng)": 38.88, - "Tatoeba (nld-eng)": 95.5, - "Tatoeba (nno-eng)": 81.41, - "Tatoeba (nob-eng)": 98.53, - "Tatoeba (nov-eng)": 50.23, - "Tatoeba (oci-eng)": 43.49, - "Tatoeba (orv-eng)": 23.77, - "Tatoeba (pam-eng)": 5.39, - "Tatoeba (pes-eng)": 93.47, - "Tatoeba (pms-eng)": 34.19, - "Tatoeba (pol-eng)": 96.95, - "Tatoeba (por-eng)": 93.02, - "Tatoeba (ron-eng)": 96.43, - "Tatoeba (rus-eng)": 92.92, - "Tatoeba (slk-eng)": 96.62, - "Tatoeba (slv-eng)": 97.08, - "Tatoeba (spa-eng)": 97.0, - "Tatoeba (sqi-eng)": 98.57, - "Tatoeba (srp-eng)": 94.12, - "Tatoeba (swe-eng)": 95.45, - "Tatoeba (swg-eng)": 22.8, - "Tatoeba (swh-eng)": 16.02, - "Tatoeba (tam-eng)": 73.6, - "Tatoeba (tat-eng)": 10.89, - "Tatoeba (tel-eng)": 79.73, - "Tatoeba (tgl-eng)": 17.67, - "Tatoeba (tha-eng)": 95.99, - "Tatoeba (tuk-eng)": 14.91, - "Tatoeba (tur-eng)": 96.17, - "Tatoeba (tzl-eng)": 34.21, - "Tatoeba (uig-eng)": 48.35, - "Tatoeba (ukr-eng)": 92.67, - "Tatoeba (urd-eng)": 95.12, - "Tatoeba (uzb-eng)": 23.19, - "Tatoeba (vie-eng)": 97.23, - "Tatoeba (war-eng)": 7.42, - "Tatoeba (wuu-eng)": 78.25, - "Tatoeba (xho-eng)": 6.53, - "Tatoeba (yid-eng)": 30.73, - "Tatoeba (yue-eng)": 77.58, - "Tatoeba (zsm-eng)": 95.8 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "AllegroReviews (pol-Latn)": 33.89, - "AllegroReviews": 33.86, - "AmazonCounterfactualClassification (en-ext)": 76.23, - "AmazonCounterfactualClassification (en)": 75.81, - "AmazonCounterfactualClassification (deu-Latn)": 69.96, - "AmazonCounterfactualClassification (jpn-Jpan)": 69.78, - "AmazonCounterfactualClassification (de)": 69.95, - "AmazonCounterfactualClassification (ja)": 69.79, - "AmazonPolarityClassification": 76.41, - "AmazonReviewsClassification (en)": 38.51, - "AmazonReviewsClassification (deu-Latn)": 39.53, - "AmazonReviewsClassification (spa-Latn)": 39.97, - "AmazonReviewsClassification (fra-Latn)": 38.98, - "AmazonReviewsClassification (jpn-Jpan)": 36.65, - "AmazonReviewsClassification (cmn-Hans)": 37.74, - "AmazonReviewsClassification (de)": 39.52, - "AmazonReviewsClassification (es)": 39.99, - "AmazonReviewsClassification (fr)": 39.0, - "AmazonReviewsClassification (ja)": 36.64, - "AmazonReviewsClassification (zh)": 37.74, - "AngryTweetsClassification (dan-Latn)": 54.84, - "Banking77Classification": 81.07, - "CBD (pol-Latn)": 64.97, - "CBD": 65.0, - "DanishPoliticalCommentsClassification (dan-Latn)": 40.96, - "EmotionClassification": 45.83, - "GeoreviewClassification (rus-Cyrl)": 42.33, - "HeadlineClassification (rus-Cyrl)": 70.35, - "IFlyTek (cmn-Hans)": 43.98, - "ImdbClassification": 64.57, - "InappropriatenessClassification (rus-Cyrl)": 59.32, - "JDReview (cmn-Hans)": 70.34, - "KinopoiskClassification (rus-Cyrl)": 44.31, - "LccSentimentClassification (dan-Latn)": 58.4, - "MTOPDomainClassification (en)": 89.24, - "MTOPDomainClassification (deu-Latn)": 85.73, - "MTOPDomainClassification (spa-Latn)": 86.98, - "MTOPDomainClassification (fra-Latn)": 81.21, - "MTOPDomainClassification (hin-Deva)": 84.76, - "MTOPDomainClassification (tha-Thai)": 82.51, - "MTOPDomainClassification (de)": 85.73, - "MTOPDomainClassification (es)": 86.96, - "MTOPDomainClassification (fr)": 81.21, - "MTOPDomainClassification (hi)": 84.76, - "MTOPDomainClassification (th)": 82.51, - "MTOPIntentClassification (en)": 68.69, - "MTOPIntentClassification (deu-Latn)": 61.26, - "MTOPIntentClassification (spa-Latn)": 66.6, - "MTOPIntentClassification (fra-Latn)": 59.75, - "MTOPIntentClassification (hin-Deva)": 62.38, - "MTOPIntentClassification (tha-Thai)": 64.77, - "MTOPIntentClassification (de)": 61.27, - "MTOPIntentClassification (es)": 66.59, - "MTOPIntentClassification (fr)": 59.76, - "MTOPIntentClassification (hi)": 62.37, - "MTOPIntentClassification (th)": 64.8, - "MasakhaNEWSClassification (amh-Ethi)": 78.83, - "MasakhaNEWSClassification (eng)": 75.39, - "MasakhaNEWSClassification (fra-Latn)": 72.94, - "MasakhaNEWSClassification (hau-Latn)": 54.49, - "MasakhaNEWSClassification (ibo-Latn)": 46.79, - "MasakhaNEWSClassification (lin-Latn)": 69.77, - "MasakhaNEWSClassification (lug-Latn)": 43.05, - "MasakhaNEWSClassification (orm-Ethi)": 41.97, - "MasakhaNEWSClassification (pcm-Latn)": 90.2, - "MasakhaNEWSClassification (run-Latn)": 49.97, - "MasakhaNEWSClassification (sna-Latn)": 59.78, - "MasakhaNEWSClassification (som-Latn)": 47.65, - "MasakhaNEWSClassification (swa-Latn)": 60.42, - "MasakhaNEWSClassification (tir-Ethi)": 45.04, - "MasakhaNEWSClassification (xho-Latn)": 48.82, - "MasakhaNEWSClassification (yor-Latn)": 58.3, - "MasakhaNEWSClassification (fra)": 78.1, - "MassiveIntentClassification (khm-Khmr)": 45.48, - "MassiveIntentClassification (swe-Latn)": 64.71, - "MassiveIntentClassification (mon-Cyrl)": 56.61, - "MassiveIntentClassification (jpn-Jpan)": 63.76, - "MassiveIntentClassification (cmo-Hant)": 62.33, - "MassiveIntentClassification (por-Latn)": 64.88, - "MassiveIntentClassification (nld-Latn)": 63.57, - "MassiveIntentClassification (fra-Latn)": 64.8, - "MassiveIntentClassification (isl-Latn)": 37.09, - "MassiveIntentClassification (hun-Latn)": 63.85, - "MassiveIntentClassification (aze-Latn)": 56.98, - "MassiveIntentClassification (tha-Thai)": 61.12, - "MassiveIntentClassification (tam-Taml)": 50.18, - "MassiveIntentClassification (slv-Latn)": 63.5, - "MassiveIntentClassification (ind-Latn)": 65.43, - "MassiveIntentClassification (hye-Armn)": 57.76, - "MassiveIntentClassification (fin-Latn)": 62.26, - "MassiveIntentClassification (heb-Hebr)": 58.25, - "MassiveIntentClassification (ara-Arab)": 51.43, - "MassiveIntentClassification (hin-Deva)": 62.79, - "MassiveIntentClassification (mya-Mymr)": 57.08, - "MassiveIntentClassification (ben-Beng)": 48.79, - "MassiveIntentClassification (kor-Kore)": 61.84, - "MassiveIntentClassification (ron-Latn)": 62.83, - "MassiveIntentClassification (afr-Latn)": 52.35, - "MassiveIntentClassification (tel-Telu)": 52.85, - "MassiveIntentClassification (fas-Arab)": 65.33, - "MassiveIntentClassification (kat-Geor)": 49.88, - "MassiveIntentClassification (sqi-Latn)": 62.49, - "MassiveIntentClassification (vie-Latn)": 59.71, - "MassiveIntentClassification (lav-Latn)": 61.29, - "MassiveIntentClassification (cmo-Hans)": 65.32, - "MassiveIntentClassification (cym-Latn)": 27.89, - "MassiveIntentClassification (dan-Latn)": 62.8, - "MassiveIntentClassification (spa-Latn)": 64.45, - "MassiveIntentClassification (swa-Latn)": 31.93, - "MassiveIntentClassification (jav-Latn)": 36.49, - "MassiveIntentClassification (en)": 69.32, - "MassiveIntentClassification (kan-Knda)": 50.62, - "MassiveIntentClassification (urd-Arab)": 56.36, - "MassiveIntentClassification (tur-Latn)": 64.58, - "MassiveIntentClassification (deu-Latn)": 59.56, - "MassiveIntentClassification (ita-Latn)": 64.69, - "MassiveIntentClassification (mal-Mlym)": 54.34, - "MassiveIntentClassification (msa-Latn)": 60.72, - "MassiveIntentClassification (pol-Latn)": 64.32, - "MassiveIntentClassification (nob-Latn)": 62.62, - "MassiveIntentClassification (rus-Cyrl)": 63.23, - "MassiveIntentClassification (amh-Ethi)": 41.56, - "MassiveIntentClassification (tgl-Latn)": 38.83, - "MassiveIntentClassification (ell-Grek)": 62.63, - "MassiveIntentClassification (pl)": 64.29, - "MassiveIntentClassification (fr)": 61.88, - "MassiveScenarioClassification (jav-Latn)": 44.22, - "MassiveScenarioClassification (kan-Knda)": 56.08, - "MassiveScenarioClassification (ara-Arab)": 57.79, - "MassiveScenarioClassification (mon-Cyrl)": 60.84, - "MassiveScenarioClassification (swa-Latn)": 37.26, - "MassiveScenarioClassification (hin-Deva)": 67.94, - "MassiveScenarioClassification (por-Latn)": 70.08, - "MassiveScenarioClassification (rus-Cyrl)": 69.92, - "MassiveScenarioClassification (cmo-Hant)": 68.71, - "MassiveScenarioClassification (fra-Latn)": 70.71, - "MassiveScenarioClassification (msa-Latn)": 65.85, - "MassiveScenarioClassification (aze-Latn)": 61.52, - "MassiveScenarioClassification (kat-Geor)": 57.3, - "MassiveScenarioClassification (fin-Latn)": 67.58, - "MassiveScenarioClassification (ind-Latn)": 70.73, - "MassiveScenarioClassification (nob-Latn)": 70.23, - "MassiveScenarioClassification (tha-Thai)": 69.44, - "MassiveScenarioClassification (mal-Mlym)": 60.14, - "MassiveScenarioClassification (ell-Grek)": 68.81, - "MassiveScenarioClassification (heb-Hebr)": 65.16, - "MassiveScenarioClassification (vie-Latn)": 65.7, - "MassiveScenarioClassification (afr-Latn)": 59.68, - "MassiveScenarioClassification (hun-Latn)": 70.31, - "MassiveScenarioClassification (tam-Taml)": 55.97, - "MassiveScenarioClassification (ita-Latn)": 69.74, - "MassiveScenarioClassification (sqi-Latn)": 69.62, - "MassiveScenarioClassification (fas-Arab)": 69.88, - "MassiveScenarioClassification (cym-Latn)": 35.27, - "MassiveScenarioClassification (deu-Latn)": 67.35, - "MassiveScenarioClassification (nld-Latn)": 70.37, - "MassiveScenarioClassification (slv-Latn)": 70.81, - "MassiveScenarioClassification (kor-Kore)": 68.52, - "MassiveScenarioClassification (lav-Latn)": 66.28, - "MassiveScenarioClassification (urd-Arab)": 62.92, - "MassiveScenarioClassification (isl-Latn)": 44.16, - "MassiveScenarioClassification (cmo-Hans)": 71.25, - "MassiveScenarioClassification (dan-Latn)": 71.04, - "MassiveScenarioClassification (tel-Telu)": 58.79, - "MassiveScenarioClassification (en)": 75.35, - "MassiveScenarioClassification (swe-Latn)": 71.6, - "MassiveScenarioClassification (pol-Latn)": 68.99, - "MassiveScenarioClassification (ron-Latn)": 67.94, - "MassiveScenarioClassification (hye-Armn)": 63.03, - "MassiveScenarioClassification (amh-Ethi)": 48.96, - "MassiveScenarioClassification (jpn-Jpan)": 69.68, - "MassiveScenarioClassification (tur-Latn)": 70.41, - "MassiveScenarioClassification (ben-Beng)": 54.52, - "MassiveScenarioClassification (spa-Latn)": 70.4, - "MassiveScenarioClassification (mya-Mymr)": 63.03, - "MassiveScenarioClassification (khm-Khmr)": 53.13, - "MassiveScenarioClassification (tgl-Latn)": 43.98, - "MassiveScenarioClassification (pl)": 68.98, - "MassiveScenarioClassification (fr)": 67.9, - "MultilingualSentiment (cmn-Hans)": 66.49, - "NoRecClassification (nob-Latn)": 50.32, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 41.57, - "OnlineShopping (cmn-Hans)": 87.75, - "PAC (pol-Latn)": 63.76, - "PAC": 63.76, - "PolEmo2.0-IN (pol-Latn)": 62.74, - "PolEmo2.0-IN": 62.78, - "PolEmo2.0-OUT (pol-Latn)": 19.92, - "PolEmo2.0-OUT": 19.98, - "RuReviewsClassification (rus-Cyrl)": 62.33, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.01, - "RuSciBenchOECDClassification (rus-Cyrl)": 44.14, - "TNews (cmn-Hans)": 43.73, - "ToxicConversationsClassification": 71.02, - "TweetSentimentExtractionClassification": 59.03, - "Waimai (cmn-Hans)": 83.97 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "AmazonCounterfactualClassification": 79.94, + "AmazonPolarityClassification": 86.07, + "AmazonReviewsClassification": 46.84, + "Banking77Classification": 88.05, + "EmotionClassification": 51.2, + "ImdbClassification": 82.94, + "MTOPDomainClassification": 96.14, + "MTOPIntentClassification": 86.11, + "MassiveIntentClassification": 79.8, + "MassiveScenarioClassification": 81.52, + "ToxicConversationsClassification": 70.59, + "TweetSentimentExtractionClassification": 61.9 } ] }, "Clustering": { "v_measure": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "8TagsClustering": 25.62, - "AlloProfClusteringP2P": 54.49, - "AlloProfClusteringS2S": 44.79, - "ArxivClusteringP2P": 37.78, - "ArxivClusteringS2S": 31.68, - "BiorxivClusteringP2P": 33.09, - "BiorxivClusteringS2S": 29.6, - "BlurbsClusteringP2P": 34.38, - "BlurbsClusteringS2S": 15.81, - "GeoreviewClusteringP2P (rus-Cyrl)": 56.18, - "HALClusteringS2S": 23.97, - "MLSUMClusteringP2P (rus-Cyrl)": 35.95, - "MLSUMClusteringP2P": 40.55, - "MLSUMClusteringS2S (rus-Cyrl)": 38.88, - "MLSUMClusteringS2S": 37.53, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 46.85, - "MasakhaNEWSClusteringP2P (eng)": 47.3, - "MasakhaNEWSClusteringP2P (fra-Latn)": 53.3, - "MasakhaNEWSClusteringP2P (hau-Latn)": 27.61, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 41.32, - "MasakhaNEWSClusteringP2P (lin-Latn)": 58.37, - "MasakhaNEWSClusteringP2P (lug-Latn)": 47.56, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 24.53, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 66.55, - "MasakhaNEWSClusteringP2P (run-Latn)": 51.97, - "MasakhaNEWSClusteringP2P (sna-Latn)": 45.55, - "MasakhaNEWSClusteringP2P (som-Latn)": 33.98, - "MasakhaNEWSClusteringP2P (swa-Latn)": 25.03, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 48.33, - "MasakhaNEWSClusteringP2P (xho-Latn)": 29.47, - "MasakhaNEWSClusteringP2P (yor-Latn)": 28.25, - "MasakhaNEWSClusteringP2P (fra)": 41.57, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 51.54, - "MasakhaNEWSClusteringS2S (eng)": 43.28, - "MasakhaNEWSClusteringS2S (fra-Latn)": 37.92, - "MasakhaNEWSClusteringS2S (hau-Latn)": 17.97, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 34.56, - "MasakhaNEWSClusteringS2S (lin-Latn)": 57.43, - "MasakhaNEWSClusteringS2S (lug-Latn)": 45.22, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 21.9, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 62.1, - "MasakhaNEWSClusteringS2S (run-Latn)": 46.81, - "MasakhaNEWSClusteringS2S (sna-Latn)": 43.15, - "MasakhaNEWSClusteringS2S (som-Latn)": 29.44, - "MasakhaNEWSClusteringS2S (swa-Latn)": 10.31, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 51.95, - "MasakhaNEWSClusteringS2S (xho-Latn)": 21.26, - "MasakhaNEWSClusteringS2S (yor-Latn)": 28.88, - "MasakhaNEWSClusteringS2S (fra)": 30.88, - "MedrxivClusteringP2P": 31.96, - "MedrxivClusteringS2S": 31.7, - "RedditClustering": 45.24, - "RedditClusteringP2P": 51.31, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 48.47, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 42.9, - "StackExchangeClustering": 52.98, - "StackExchangeClusteringP2P": 32.94, - "TenKGnadClusteringP2P": 35.96, - "TenKGnadClusteringS2S": 22.0, - "TwentyNewsgroupsClustering": 44.1 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "ArxivClusteringP2P": 44.27, + "ArxivClusteringS2S": 46.85, + "BiorxivClusteringP2P": 32.35, + "BiorxivClusteringS2S": 36.7, + "MedrxivClusteringP2P": 30.71, + "MedrxivClusteringS2S": 32.96, + "RedditClustering": 61.72, + "RedditClusteringP2P": 63.98, + "StackExchangeClustering": 72.74, + "StackExchangeClusteringP2P": 32.26, + "TwentyNewsgroupsClustering": 56.41 } ] }, "PairClassification": { "max_ap": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "CDSC-E (pol-Latn)": 75.77, - "OpusparcusPC (deu-Latn)": 97.34, - "OpusparcusPC (en)": 98.59, - "OpusparcusPC (fin-Latn)": 95.33, - "OpusparcusPC (fra-Latn)": 93.45, - "OpusparcusPC (rus-Cyrl)": 90.47, - "OpusparcusPC (swe-Latn)": 95.16, - "PSC (pol-Latn)": 98.26, - "PawsXPairClassification (deu-Latn)": 55.69, - "PawsXPairClassification (en)": 60.12, - "PawsXPairClassification (spa-Latn)": 56.94, - "PawsXPairClassification (fra-Latn)": 58.14, - "PawsXPairClassification (jpn-Hira)": 49.37, - "PawsXPairClassification (kor-Hang)": 50.66, - "PawsXPairClassification (cmn-Hans)": 55.47, - "SICK-E-PL (pol-Latn)": 77.22, - "SprintDuplicateQuestions": 90.55, - "TERRa (rus-Cyrl)": 64.57, - "TwitterSemEval2015": 66.75, - "TwitterURLCorpus": 85.14 - }, - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "CDSC-E (pol-Latn)": 75.77, - "CDSC-E": 75.76, - "OpusparcusPC (deu-Latn)": 97.34, - "OpusparcusPC (en)": 98.59, - "OpusparcusPC (fin-Latn)": 95.33, - "OpusparcusPC (fra-Latn)": 93.45, - "OpusparcusPC (rus-Cyrl)": 90.47, - "OpusparcusPC (swe-Latn)": 95.16, - "OpusparcusPC (fr)": 93.45, - "PPC": 93.67, - "PSC (pol-Latn)": 98.26, - "PSC": 98.26, - "PawsXPairClassification (deu-Latn)": 55.71, - "PawsXPairClassification (en)": 60.12, - "PawsXPairClassification (spa-Latn)": 56.94, - "PawsXPairClassification (fra-Latn)": 58.33, - "PawsXPairClassification (jpn-Hira)": 49.37, - "PawsXPairClassification (kor-Hang)": 50.78, - "PawsXPairClassification (cmn-Hans)": 55.47, - "PawsXPairClassification (fr)": 58.17, - "SICK-E-PL (pol-Latn)": 77.22, - "SICK-E-PL": 77.22, - "SprintDuplicateQuestions": 91.1, - "TERRa (rus-Cyrl)": 64.57, - "TwitterSemEval2015": 68.75, - "TwitterURLCorpus": 85.32 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "SprintDuplicateQuestions": 95.09, + "TwitterSemEval2015": 81.73, + "TwitterURLCorpus": 86.56 }, { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "CDSC-E": 75.76, - "OpusparcusPC (fr)": 93.45, - "PPC": 93.67, - "PSC": 98.26, - "PawsXPairClassification (fr)": 58.14, - "SICK-E-PL": 77.22, - "SprintDuplicateQuestions": 90.55, - "TwitterSemEval2015": 66.75, - "TwitterURLCorpus": 85.14 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "SprintDuplicateQuestions": 95.09, + "TwitterSemEval2015": 81.73, + "TwitterURLCorpus": 86.56 } ] }, "Reranking": { "map": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "AlloprofReranking (fra-Latn)": 67.2, - "AlloprofReranking": 54.34, - "AskUbuntuDupQuestions": 60.16, - "MMarcoReranking (cmn-Hans)": 14.57, - "MindSmallReranking": 30.15, - "RuBQReranking (rus-Cyrl)": 58.77, - "SciDocsRR": 78.09, - "StackOverflowDupQuestions": 46.79, - "SyntecReranking (fra-Latn)": 80.97, - "SyntecReranking": 83.23, - "T2Reranking (cmn-Hans)": 64.49 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "AskUbuntuDupQuestions": 65.19, + "MindSmallReranking": 32.67, + "SciDocsRR": 86.05, + "StackOverflowDupQuestions": 54.82 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "AILACasedocs": 17.45, - "AILAStatutes": 22.24, - "ARCChallenge": 7.19, - "AlloprofRetrieval (fra-Latn)": 30.8, - "AlloprofRetrieval": 30.8, - "AlphaNLI": 21.87, - "AppsRetrieval (eng-Latn_python-Code)": 2.34, - "ArguAna": 48.91, - "ArguAna-PL (pol-Latn)": 42.61, - "ArguAna-PL": 42.62, - "BSARDRetrieval (fra-Latn)": 13.19, - "BSARDRetrieval": 0.0, - "CQADupstackRetrieval": 31.32, - "ClimateFEVER": 15.27, - "CmedqaRetrieval (cmn-Hans)": 10.15, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 11.43, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 35.34, - "CodeSearchNetCCRetrieval (python-Code)": 53.43, - "CodeSearchNetCCRetrieval (javascript-Code)": 57.46, - "CodeSearchNetCCRetrieval (go-Code)": 26.63, - "CodeSearchNetCCRetrieval (ruby-Code)": 55.02, - "CodeSearchNetCCRetrieval (java-Code)": 42.1, - "CodeSearchNetCCRetrieval (php-Code)": 29.8, - "CodeSearchNetRetrieval (python-Code)": 65.54, - "CodeSearchNetRetrieval (javascript-Code)": 48.34, - "CodeSearchNetRetrieval (go-Code)": 67.23, - "CodeSearchNetRetrieval (ruby-Code)": 60.19, - "CodeSearchNetRetrieval (java-Code)": 45.56, - "CodeSearchNetRetrieval (php-Code)": 48.3, - "CodeTransOceanContest (python-Code_c++-Code)": 27.94, - "CodeTransOceanDL": 16.71, - "CosQA (eng-Latn_python-Code)": 16.57, - "CovidRetrieval (cmn-Hans)": 28.85, - "DBPedia": 26.22, - "DBPedia-PL": 20.18, - "DuRetrieval (cmn-Hans)": 33.41, - "EcomRetrieval (cmn-Hans)": 9.69, - "FEVER": 56.76, - "FiQA-PL (pol-Latn)": 14.71, - "FiQA-PL": 14.68, - "FiQA2018": 22.96, - "GerDaLIRSmall (deu-Latn)": 3.0, - "HellaSwag": 17.53, - "HotpotQA": 37.03, - "HotpotQA-PL": 29.36, - "LEMBNarrativeQARetrieval": 16.02, - "LEMBQMSumRetrieval": 12.23, - "LEMBSummScreenFDRetrieval": 41.15, - "LEMBWikimQARetrieval": 38.86, - "LeCaRDv2 (zho-Hans)": 33.91, - "LegalBenchConsumerContractsQA": 52.37, - "LegalBenchCorporateLobbying": 87.62, - "LegalQuAD (deu-Latn)": 17.8, - "LegalSummarization": 56.8, - "MMarcoRetrieval (cmn-Hans)": 44.62, - "MSMARCO": 26.6, - "MSMARCO-PL": 12.45, - "MedicalRetrieval (cmn-Hans)": 14.1, - "MintakaRetrieval (ara-Arab)": 14.55, - "MintakaRetrieval (deu-Latn)": 25.43, - "MintakaRetrieval (spa-Latn)": 24.94, - "MintakaRetrieval (fra-Latn)": 24.45, - "MintakaRetrieval (hin-Deva)": 18.67, - "MintakaRetrieval (ita-Latn)": 25.62, - "MintakaRetrieval (jpn-Hira)": 15.46, - "MintakaRetrieval (por-Latn)": 26.15, - "MintakaRetrieval (fr)": 24.45, - "NFCorpus": 25.49, - "NFCorpus-PL (pol-Latn)": 18.54, - "NFCorpus-PL": 18.53, - "NQ": 33.6, - "NQ-PL": 15.64, - "PIQA": 18.65, - "Quail": 2.98, - "Quora-PL": 79.18, - "QuoraRetrieval": 86.4, - "RARbCode": 11.02, - "RARbMath": 30.93, - "RiaNewsRetrieval (rus-Cyrl)": 51.75, - "RuBQRetrieval (rus-Cyrl)": 37.04, - "SCIDOCS": 13.97, - "SCIDOCS-PL (pol-Latn)": 11.17, - "SCIDOCS-PL": 11.18, - "SIQA": 1.21, - "SciFact": 50.3, - "SciFact-PL (pol-Latn)": 41.55, - "SciFact-PL": 41.53, - "SpartQA": 5.69, - "StackOverflowQA": 43.11, - "SyntecRetrieval (fra-Latn)": 76.0, - "SyntecRetrieval": 76.0, - "SyntheticText2SQL (eng-Latn_sql-Code)": 35.22, - "T2Retrieval (cmn-Hans)": 28.35, - "TRECCOVID": 37.87, - "TRECCOVID-PL (pol-Latn)": 35.43, - "TRECCOVID-PL": 35.38, - "TempReasonL1": 1.94, - "TempReasonL2Fact": 5.34, - "TempReasonL2Pure": 0.33, - "TempReasonL3Fact": 6.79, - "TempReasonL3Pure": 3.19, - "Touche2020": 17.4, - "VideoRetrieval (cmn-Hans)": 14.18, - "WinoGrande": 49.01, - "XPQARetrieval (ara-Arab_ara-Arab)": 24.86, - "XPQARetrieval (eng-Latn_ara-Arab)": 19.6, - "XPQARetrieval (ara-Arab_eng-Latn)": 28.21, - "XPQARetrieval (deu-Latn_deu-Latn)": 48.81, - "XPQARetrieval (eng-Latn_deu-Latn)": 31.93, - "XPQARetrieval (deu-Latn_eng-Latn)": 53.26, - "XPQARetrieval (spa-Latn_spa-Latn)": 41.08, - "XPQARetrieval (eng-Latn_spa-Latn)": 30.05, - "XPQARetrieval (spa-Latn_eng-Latn)": 43.4, - "XPQARetrieval (fra-Latn_fra-Latn)": 46.22, - "XPQARetrieval (eng-Latn_fra-Latn)": 29.55, - "XPQARetrieval (fra-Latn_eng-Latn)": 47.3, - "XPQARetrieval (hin-Deva_hin-Deva)": 50.74, - "XPQARetrieval (eng-Latn_hin-Deva)": 24.97, - "XPQARetrieval (hin-Deva_eng-Latn)": 49.24, - "XPQARetrieval (ita-Latn_ita-Latn)": 52.87, - "XPQARetrieval (eng-Latn_ita-Latn)": 33.44, - "XPQARetrieval (ita-Latn_eng-Latn)": 51.49, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 53.17, - "XPQARetrieval (eng-Latn_jpn-Hira)": 26.66, - "XPQARetrieval (jpn-Hira_eng-Latn)": 49.86, - "XPQARetrieval (kor-Hang_kor-Hang)": 24.88, - "XPQARetrieval (eng-Latn_kor-Hang)": 24.5, - "XPQARetrieval (kor-Hang_eng-Latn)": 24.61, - "XPQARetrieval (pol-Latn_pol-Latn)": 29.36, - "XPQARetrieval (eng-Latn_pol-Latn)": 20.48, - "XPQARetrieval (pol-Latn_eng-Latn)": 29.31, - "XPQARetrieval (por-Latn_por-Latn)": 34.3, - "XPQARetrieval (eng-Latn_por-Latn)": 21.72, - "XPQARetrieval (por-Latn_eng-Latn)": 37.65, - "XPQARetrieval (tam-Taml_tam-Taml)": 19.8, - "XPQARetrieval (eng-Latn_tam-Taml)": 13.93, - "XPQARetrieval (tam-Taml_eng-Latn)": 18.26, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 42.54, - "XPQARetrieval (eng-Latn_cmn-Hans)": 20.91, - "XPQARetrieval (cmn-Hans_eng-Latn)": 42.81, - "XPQARetrieval (fr)": 46.22 - }, - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "LEMBNeedleRetrieval": 14.0, - "LEMBPasskeyRetrieval": 7.75 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "ArguAna": 62.78, + "CQADupstackRetrieval": 48.25, + "ClimateFEVER": 34.27, + "DBPedia": 48.34, + "FEVER": 90.2, + "FiQA2018": 55.33, + "HotpotQA": 71.76, + "MSMARCO": 43.24, + "NFCorpus": 41.83, + "NQ": 64.21, + "QuoraRetrieval": 87.16, + "SCIDOCS": 22.96, + "SciFact": 78.22, + "TRECCOVID": 80.34, + "Touche2020": 20.5 } ] }, "STS": { "cosine_spearman": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "AFQMC (cmn-Hans)": 15.69, - "ATEC (cmn-Hans)": 20.27, - "BIOSSES": 76.27, - "BQ (cmn-Hans)": 36.33, - "CDSC-R (pol-Latn)": 88.8, - "LCQMC (cmn-Hans)": 63.3, - "PAWSX (cmn-Hans)": 12.16, - "RUParaPhraserSTS (rus-Cyrl)": 65.74, - "RuSTSBenchmarkSTS (rus-Cyrl)": 82.46, - "SICK-R": 79.62, - "SICK-R-PL (pol-Latn)": 73.13, - "SICKFr (fra-Latn)": 75.56, - "STS12": 77.9, - "STS13": 85.11, - "STS14": 80.81, - "STS15": 87.48, - "STS16": 83.2, - "STS17 (kor-Hang)": 83.41, - "STS17 (nld-Latn_eng-Latn)": 82.51, - "STS17 (eng-Latn_tur-Latn)": 74.9, - "STS17 (eng-Latn_ara-Arab)": 80.85, - "STS17 (spa-Latn_eng-Latn)": 86.11, - "STS17 (fra-Latn_eng-Latn)": 81.17, - "STS17 (ara-Arab)": 79.1, - "STS17 (ita-Latn_eng-Latn)": 84.24, - "STS17 (eng-Latn_deu-Latn)": 83.28, - "STS17 (spa-Latn)": 85.14, - "STS17 (en-en)": 86.99, - "STS22 (rus-Cyrl)": 58.74, - "STS22 (spa-Latn_eng-Latn)": 70.26, - "STS22 (spa-Latn)": 59.91, - "STS22 (tur-Latn)": 56.3, - "STS22 (ita-Latn)": 60.65, - "STS22 (ara-Arab)": 52.19, - "STS22 (pol-Latn)": 33.65, - "STS22 (deu-Latn)": 46.7, - "STS22 (cmn-Hans)": 61.75, - "STS22 (fra-Latn)": 74.3, - "STS22 (deu-Latn_eng-Latn)": 50.81, - "STS22 (pol-Latn_eng-Latn)": 73.07, - "STS22 (en)": 63.52, - "STS22 (spa-Latn_ita-Latn)": 53.7, - "STS22 (deu-Latn_fra-Latn)": 62.34, - "STS22 (deu-Latn_pol-Latn)": 40.53, - "STS22 (cmn-Hans_eng-Latn)": 67.96, - "STS22 (fra-Latn_pol-Latn)": 84.52, - "STSB (cmn-Hans)": 80.84, - "STSBenchmark": 86.82, - "STSBenchmarkMultilingualSTS (spa-Latn)": 84.61, - "STSBenchmarkMultilingualSTS (por-Latn)": 84.0, - "STSBenchmarkMultilingualSTS (nld-Latn)": 83.36, - "STSBenchmarkMultilingualSTS (deu-Latn)": 83.56, - "STSBenchmarkMultilingualSTS (pol-Latn)": 81.46, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 82.45, - "STSBenchmarkMultilingualSTS (fra-Latn)": 84.69, - "STSBenchmarkMultilingualSTS (en)": 86.82, - "STSBenchmarkMultilingualSTS (ita-Latn)": 84.09, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.98 - }, - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "BIOSSES": 76.27, - "CDSC-R": 88.8, - "SICK-R": 79.62, - "SICK-R-PL": 73.13, - "SICKFr": 75.56, - "STS12": 77.9, - "STS13": 85.11, - "STS14": 80.81, - "STS15": 87.48, - "STS16": 83.2, - "STS17 (ar-ar)": 79.1, - "STS17 (en-ar)": 80.85, - "STS17 (en-de)": 83.28, - "STS17 (en-en)": 86.99, - "STS17 (en-tr)": 74.9, - "STS17 (es-en)": 86.11, - "STS17 (es-es)": 85.14, - "STS17 (fr-en)": 81.17, - "STS17 (it-en)": 84.24, - "STS17 (ko-ko)": 83.41, - "STS17 (nl-en)": 82.51, - "STS22 (pl)": 33.64, - "STS22 (fr)": 74.3, - "STSBenchmark": 86.82, - "STSBenchmarkMultilingualSTS (fr)": 84.69 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "BIOSSES": 84.92, + "SICK-R": 83.94, + "STS12": 79.27, + "STS13": 84.83, + "STS14": 82.94, + "STS15": 88.09, + "STS16": 86.54, + "STS17": 89.58, + "STS22": 67.67, + "STSBenchmark": 88.05 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "SummEval": 31.57, - "SummEvalFr (fra-Latn)": 29.47 - }, - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "SummEval": 31.57, - "SummEvalFr (fra-Latn)": 29.47 - }, - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "SummEval": 31.57, - "SummEvalFr": 29.47 - } - ] - }, - "MultilabelClassification": { - "accuracy": [ - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "CEDRClassification (rus-Cyrl)": 39.98, - "SensitiveTopicsClassification (rus-Cyrl)": 25.83 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "paraphrase-multilingual-mpnet-base-v2", - "Core17InstructionRetrieval": -0.04, - "News21InstructionRetrieval": 0.61, - "Robust04InstructionRetrieval": -3.99 - } - ] - } - }, - "all-MiniLM-L6-v2-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "all-MiniLM-L6-v2-instruct", - "ARCChallenge": 9.4, - "AlphaNLI": 15.09, - "HellaSwag": 20.51, - "PIQA": 24.68, - "Quail": 3.46, - "RARbCode": 42.47, - "RARbMath": 62.39, - "SIQA": 1.53, - "SpartQA": 0.57, - "TempReasonL1": 1.05, - "TempReasonL2Fact": 16.57, - "TempReasonL2Pure": 0.49, - "TempReasonL3Fact": 14.01, - "TempReasonL3Pure": 6.27, - "WinoGrande": 20.73 + "Model": "LLM2Vec-Meta-Llama-3-supervised", + "SummEval": 30.94 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, @@ -2332,94 +1752,120 @@ "p-MRR": [] } }, - "bge-large-zh-noinstruct": { + "LLM2Vec-Meta-Llama-3-unsupervised": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bge-large-zh-noinstruct", - "AmazonReviewsClassification (zh)": 41.94, - "IFlyTek": 45.32, - "JDReview": 85.38, - "MassiveIntentClassification (zh-CN)": 66.96, - "MassiveScenarioClassification (zh-CN)": 73.39, - "MultilingualSentiment": 73.7, - "OnlineShopping": 91.66, - "TNews": 52.05, - "Waimai": 86.83 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "AmazonCounterfactualClassification": 75.7, + "AmazonPolarityClassification": 80.68, + "AmazonReviewsClassification": 40.0, + "Banking77Classification": 84.77, + "EmotionClassification": 47.08, + "ImdbClassification": 75.19, + "MTOPDomainClassification": 94.47, + "MTOPIntentClassification": 81.09, + "MassiveIntentClassification": 75.01, + "MassiveScenarioClassification": 79.16, + "ToxicConversationsClassification": 71.85, + "TweetSentimentExtractionClassification": 57.61 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-large-zh-noinstruct", - "CLSClusteringP2P": 41.23, - "CLSClusteringS2S": 40.04, - "ThuNewsClusteringP2P": 62.03, - "ThuNewsClusteringS2S": 56.75 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "ArxivClusteringP2P": 49.22, + "ArxivClusteringS2S": 41.71, + "BiorxivClusteringP2P": 38.39, + "BiorxivClusteringS2S": 31.31, + "MedrxivClusteringP2P": 31.47, + "MedrxivClusteringS2S": 27.87, + "RedditClustering": 43.67, + "RedditClusteringP2P": 61.67, + "StackExchangeClustering": 68.2, + "StackExchangeClusteringP2P": 36.36, + "TwentyNewsgroupsClustering": 32.01 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bge-large-zh-noinstruct", - "Cmnli": 82.17, - "Ocnli": 71.37 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "SprintDuplicateQuestions": 88.14, + "TwitterSemEval2015": 66.6, + "TwitterURLCorpus": 79.3 }, { - "Model": "bge-large-zh-noinstruct", - "Cmnli": 82.18, - "Ocnli": 71.37 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "SprintDuplicateQuestions": 88.14, + "TwitterSemEval2015": 66.6, + "TwitterURLCorpus": 79.3 } ] }, "Reranking": { "map": [ { - "Model": "bge-large-zh-noinstruct", - "CMedQAv1": 81.72, - "CMedQAv2": 84.64, - "MMarcoReranking": 27.1, - "T2Reranking": 66.16 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "AskUbuntuDupQuestions": 57.16, + "MindSmallReranking": 30.1, + "SciDocsRR": 76.28, + "StackOverflowDupQuestions": 48.82 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-large-zh-noinstruct", - "CmedqaRetrieval": 41.03, - "CovidRetrieval": 75.07, - "DuRetrieval": 84.68, - "EcomRetrieval": 65.6, - "MMarcoRetrieval": 81.38, - "MedicalRetrieval": 58.28, - "T2Retrieval": 84.39, - "VideoRetrieval": 73.93 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "ArguAna": 51.73, + "CQADupstackRetrieval": 32.4, + "ClimateFEVER": 23.58, + "DBPedia": 26.78, + "FEVER": 53.42, + "FiQA2018": 28.56, + "HotpotQA": 52.37, + "MSMARCO": 17.47, + "NFCorpus": 26.28, + "NQ": 37.65, + "QuoraRetrieval": 84.64, + "SCIDOCS": 10.39, + "SciFact": 66.36, + "TRECCOVID": 63.34, + "Touche2020": 12.82 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bge-large-zh-noinstruct", - "AFQMC": 43.06, - "ATEC": 48.29, - "BQ": 60.53, - "LCQMC": 74.71, - "PAWSX": 16.64, - "QBQTC": 35.2, - "STS22 (zh)": 67.19, - "STSB": 78.41 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "BIOSSES": 84.67, + "SICK-R": 72.16, + "STS12": 61.6, + "STS13": 79.71, + "STS14": 72.11, + "STS15": 82.18, + "STS16": 79.41, + "STS17": 85.44, + "STS22": 63.9, + "STSBenchmark": 77.44 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "SummEval": 31.45 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -2428,383 +1874,118 @@ "p-MRR": [] } }, - "gtr-t5-xl": { + "LLM2Vec-Mistral-supervised": { "BitextMining": { - "f1": [ - { - "Model": "gtr-t5-xl", - "BUCC (de-en)": 90.99, - "BUCC (fr-en)": 88.55, - "BUCC (ru-en)": 2.07, - "BUCC (zh-en)": 1.49, - "Tatoeba (afr-eng)": 33.47, - "Tatoeba (amh-eng)": 0.01, - "Tatoeba (ang-eng)": 30.74, - "Tatoeba (ara-eng)": 0.47, - "Tatoeba (arq-eng)": 0.34, - "Tatoeba (arz-eng)": 0.14, - "Tatoeba (ast-eng)": 51.74, - "Tatoeba (awa-eng)": 0.49, - "Tatoeba (aze-eng)": 7.43, - "Tatoeba (bel-eng)": 3.45, - "Tatoeba (ben-eng)": 0.06, - "Tatoeba (ber-eng)": 5.79, - "Tatoeba (bos-eng)": 17.43, - "Tatoeba (bre-eng)": 5.69, - "Tatoeba (bul-eng)": 7.55, - "Tatoeba (cat-eng)": 48.06, - "Tatoeba (cbk-eng)": 54.56, - "Tatoeba (ceb-eng)": 8.72, - "Tatoeba (ces-eng)": 8.76, - "Tatoeba (cha-eng)": 27.56, - "Tatoeba (cmn-eng)": 2.26, - "Tatoeba (cor-eng)": 3.69, - "Tatoeba (csb-eng)": 13.18, - "Tatoeba (cym-eng)": 6.97, - "Tatoeba (dan-eng)": 47.36, - "Tatoeba (deu-eng)": 91.54, - "Tatoeba (dsb-eng)": 13.2, - "Tatoeba (dtp-eng)": 4.54, - "Tatoeba (ell-eng)": 0.55, - "Tatoeba (epo-eng)": 27.86, - "Tatoeba (est-eng)": 5.13, - "Tatoeba (eus-eng)": 10.23, - "Tatoeba (fao-eng)": 21.44, - "Tatoeba (fin-eng)": 6.62, - "Tatoeba (fra-eng)": 79.66, - "Tatoeba (fry-eng)": 32.92, - "Tatoeba (gla-eng)": 2.87, - "Tatoeba (gle-eng)": 3.26, - "Tatoeba (glg-eng)": 63.81, - "Tatoeba (gsw-eng)": 29.71, - "Tatoeba (heb-eng)": 0.33, - "Tatoeba (hin-eng)": 0.25, - "Tatoeba (hrv-eng)": 17.16, - "Tatoeba (hsb-eng)": 12.02, - "Tatoeba (hun-eng)": 7.21, - "Tatoeba (hye-eng)": 0.78, - "Tatoeba (ido-eng)": 40.83, - "Tatoeba (ile-eng)": 54.95, - "Tatoeba (ina-eng)": 72.28, - "Tatoeba (ind-eng)": 30.95, - "Tatoeba (isl-eng)": 11.29, - "Tatoeba (ita-eng)": 73.83, - "Tatoeba (jav-eng)": 8.66, - "Tatoeba (jpn-eng)": 0.61, - "Tatoeba (kab-eng)": 1.78, - "Tatoeba (kat-eng)": 0.79, - "Tatoeba (kaz-eng)": 0.95, - "Tatoeba (khm-eng)": 0.49, - "Tatoeba (kor-eng)": 1.87, - "Tatoeba (kur-eng)": 10.91, - "Tatoeba (kzj-eng)": 5.72, - "Tatoeba (lat-eng)": 18.24, - "Tatoeba (lfn-eng)": 43.49, - "Tatoeba (lit-eng)": 7.13, - "Tatoeba (lvs-eng)": 7.04, - "Tatoeba (mal-eng)": 0.44, - "Tatoeba (mar-eng)": 0.03, - "Tatoeba (max-eng)": 18.99, - "Tatoeba (mhr-eng)": 1.11, - "Tatoeba (mkd-eng)": 2.49, - "Tatoeba (mon-eng)": 2.01, - "Tatoeba (nds-eng)": 39.96, - "Tatoeba (nld-eng)": 58.86, - "Tatoeba (nno-eng)": 29.07, - "Tatoeba (nob-eng)": 40.25, - "Tatoeba (nov-eng)": 50.19, - "Tatoeba (oci-eng)": 30.72, - "Tatoeba (orv-eng)": 0.85, - "Tatoeba (pam-eng)": 7.21, - "Tatoeba (pes-eng)": 0.53, - "Tatoeba (pms-eng)": 31.07, - "Tatoeba (pol-eng)": 18.06, - "Tatoeba (por-eng)": 81.92, - "Tatoeba (ron-eng)": 62.6, - "Tatoeba (rus-eng)": 22.24, - "Tatoeba (slk-eng)": 10.59, - "Tatoeba (slv-eng)": 11.4, - "Tatoeba (spa-eng)": 85.78, - "Tatoeba (sqi-eng)": 14.92, - "Tatoeba (srp-eng)": 9.87, - "Tatoeba (swe-eng)": 55.08, - "Tatoeba (swg-eng)": 32.66, - "Tatoeba (swh-eng)": 7.64, - "Tatoeba (tam-eng)": 0.49, - "Tatoeba (tat-eng)": 1.28, - "Tatoeba (tel-eng)": 0.45, - "Tatoeba (tgl-eng)": 23.63, - "Tatoeba (tha-eng)": 0.61, - "Tatoeba (tuk-eng)": 5.71, - "Tatoeba (tur-eng)": 8.25, - "Tatoeba (tzl-eng)": 28.4, - "Tatoeba (uig-eng)": 0.57, - "Tatoeba (ukr-eng)": 5.69, - "Tatoeba (urd-eng)": 0.0, - "Tatoeba (uzb-eng)": 4.19, - "Tatoeba (vie-eng)": 9.07, - "Tatoeba (war-eng)": 12.31, - "Tatoeba (wuu-eng)": 1.38, - "Tatoeba (xho-eng)": 7.6, - "Tatoeba (yid-eng)": 0.41, - "Tatoeba (yue-eng)": 1.31, - "Tatoeba (zsm-eng)": 29.74 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "gtr-t5-xl", - "AmazonCounterfactualClassification (de)": 59.79, - "AmazonCounterfactualClassification (en)": 68.6, - "AmazonCounterfactualClassification (en-ext)": 69.03, - "AmazonCounterfactualClassification (ja)": 50.59, - "AmazonPolarityClassification": 74.58, - "AmazonReviewsClassification (de)": 35.06, - "AmazonReviewsClassification (en)": 38.2, - "AmazonReviewsClassification (es)": 37.18, - "AmazonReviewsClassification (fr)": 35.48, - "AmazonReviewsClassification (ja)": 22.24, - "AmazonReviewsClassification (zh)": 21.89, - "Banking77Classification": 82.22, - "EmotionClassification": 45.54, - "ImdbClassification": 68.15, - "MTOPDomainClassification (de)": 85.42, - "MTOPDomainClassification (en)": 93.6, - "MTOPDomainClassification (es)": 88.2, - "MTOPDomainClassification (fr)": 85.05, - "MTOPDomainClassification (hi)": 21.74, - "MTOPDomainClassification (th)": 15.87, - "MTOPIntentClassification (de)": 55.75, - "MTOPIntentClassification (en)": 65.93, - "MTOPIntentClassification (es)": 57.73, - "MTOPIntentClassification (fr)": 51.07, - "MTOPIntentClassification (hi)": 3.19, - "MTOPIntentClassification (th)": 5.55, - "MassiveIntentClassification (af)": 42.6, - "MassiveIntentClassification (am)": 2.12, - "MassiveIntentClassification (ar)": 4.64, - "MassiveIntentClassification (az)": 35.05, - "MassiveIntentClassification (bn)": 2.84, - "MassiveIntentClassification (cy)": 36.19, - "MassiveIntentClassification (da)": 48.42, - "MassiveIntentClassification (de)": 55.49, - "MassiveIntentClassification (el)": 10.14, - "MassiveIntentClassification (en)": 70.23, - "MassiveIntentClassification (es)": 56.72, - "MassiveIntentClassification (fa)": 3.54, - "MassiveIntentClassification (fi)": 37.13, - "MassiveIntentClassification (fr)": 57.67, - "MassiveIntentClassification (he)": 2.56, - "MassiveIntentClassification (hi)": 3.24, - "MassiveIntentClassification (hu)": 34.22, - "MassiveIntentClassification (hy)": 3.01, - "MassiveIntentClassification (id)": 46.54, - "MassiveIntentClassification (is)": 34.77, - "MassiveIntentClassification (it)": 54.13, - "MassiveIntentClassification (ja)": 4.27, - "MassiveIntentClassification (jv)": 36.97, - "MassiveIntentClassification (ka)": 2.72, - "MassiveIntentClassification (km)": 5.35, - "MassiveIntentClassification (kn)": 3.17, - "MassiveIntentClassification (ko)": 2.64, - "MassiveIntentClassification (lv)": 36.32, - "MassiveIntentClassification (ml)": 3.18, - "MassiveIntentClassification (mn)": 22.85, - "MassiveIntentClassification (ms)": 42.87, - "MassiveIntentClassification (my)": 4.04, - "MassiveIntentClassification (nb)": 45.87, - "MassiveIntentClassification (nl)": 49.53, - "MassiveIntentClassification (pl)": 42.64, - "MassiveIntentClassification (pt)": 57.03, - "MassiveIntentClassification (ro)": 49.95, - "MassiveIntentClassification (ru)": 36.58, - "MassiveIntentClassification (sl)": 39.44, - "MassiveIntentClassification (sq)": 41.78, - "MassiveIntentClassification (sv)": 47.95, - "MassiveIntentClassification (sw)": 35.85, - "MassiveIntentClassification (ta)": 2.32, - "MassiveIntentClassification (te)": 2.2, - "MassiveIntentClassification (th)": 3.74, - "MassiveIntentClassification (tl)": 43.12, - "MassiveIntentClassification (tr)": 35.24, - "MassiveIntentClassification (ur)": 3.0, - "MassiveIntentClassification (vi)": 30.01, - "MassiveIntentClassification (zh-CN)": 1.72, - "MassiveIntentClassification (zh-TW)": 3.35, - "MassiveScenarioClassification (af)": 52.54, - "MassiveScenarioClassification (am)": 6.3, - "MassiveScenarioClassification (ar)": 11.96, - "MassiveScenarioClassification (az)": 40.17, - "MassiveScenarioClassification (bn)": 8.29, - "MassiveScenarioClassification (cy)": 42.24, - "MassiveScenarioClassification (da)": 57.28, - "MassiveScenarioClassification (de)": 68.09, - "MassiveScenarioClassification (el)": 16.66, - "MassiveScenarioClassification (en)": 75.94, - "MassiveScenarioClassification (es)": 64.32, - "MassiveScenarioClassification (fa)": 6.9, - "MassiveScenarioClassification (fi)": 43.96, - "MassiveScenarioClassification (fr)": 66.72, - "MassiveScenarioClassification (he)": 7.51, - "MassiveScenarioClassification (hi)": 7.82, - "MassiveScenarioClassification (hu)": 42.16, - "MassiveScenarioClassification (hy)": 9.33, - "MassiveScenarioClassification (id)": 53.54, - "MassiveScenarioClassification (is)": 42.84, - "MassiveScenarioClassification (it)": 62.44, - "MassiveScenarioClassification (ja)": 7.29, - "MassiveScenarioClassification (jv)": 43.13, - "MassiveScenarioClassification (ka)": 7.63, - "MassiveScenarioClassification (km)": 9.08, - "MassiveScenarioClassification (kn)": 8.1, - "MassiveScenarioClassification (ko)": 6.35, - "MassiveScenarioClassification (lv)": 40.24, - "MassiveScenarioClassification (ml)": 7.65, - "MassiveScenarioClassification (mn)": 27.98, - "MassiveScenarioClassification (ms)": 52.41, - "MassiveScenarioClassification (my)": 9.21, - "MassiveScenarioClassification (nb)": 54.44, - "MassiveScenarioClassification (nl)": 60.35, - "MassiveScenarioClassification (pl)": 49.97, - "MassiveScenarioClassification (pt)": 62.78, - "MassiveScenarioClassification (ro)": 59.62, - "MassiveScenarioClassification (ru)": 43.44, - "MassiveScenarioClassification (sl)": 44.79, - "MassiveScenarioClassification (sq)": 50.84, - "MassiveScenarioClassification (sv)": 58.21, - "MassiveScenarioClassification (sw)": 44.63, - "MassiveScenarioClassification (ta)": 7.95, - "MassiveScenarioClassification (te)": 7.5, - "MassiveScenarioClassification (th)": 8.79, - "MassiveScenarioClassification (tl)": 53.54, - "MassiveScenarioClassification (tr)": 42.47, - "MassiveScenarioClassification (ur)": 9.58, - "MassiveScenarioClassification (vi)": 34.68, - "MassiveScenarioClassification (zh-CN)": 5.21, - "MassiveScenarioClassification (zh-TW)": 8.77, - "ToxicConversationsClassification": 67.56, - "TweetSentimentExtractionClassification": 54.77 + "Model": "LLM2Vec-Mistral-supervised", + "AmazonCounterfactualClassification": 77.58, + "AmazonPolarityClassification": 91.12, + "AmazonReviewsClassification": 49.97, + "Banking77Classification": 88.31, + "EmotionClassification": 52.04, + "ImdbClassification": 87.42, + "MTOPDomainClassification": 96.04, + "MTOPIntentClassification": 84.77, + "MassiveIntentClassification": 79.29, + "MassiveScenarioClassification": 81.64, + "ToxicConversationsClassification": 69.26, + "TweetSentimentExtractionClassification": 62.14 } ] }, "Clustering": { "v_measure": [ { - "Model": "gtr-t5-xl", - "ArxivClusteringP2P": 37.9, - "ArxivClusteringS2S": 30.45, - "BiorxivClusteringP2P": 30.52, - "BiorxivClusteringS2S": 26.06, - "MedrxivClusteringP2P": 28.69, - "MedrxivClusteringS2S": 26.69, - "RedditClustering": 61.34, - "RedditClusteringP2P": 61.11, - "StackExchangeClustering": 69.95, - "StackExchangeClusteringP2P": 32.73, - "TwentyNewsgroupsClustering": 51.15 + "Model": "LLM2Vec-Mistral-supervised", + "ArxivClusteringP2P": 42.81, + "ArxivClusteringS2S": 44.24, + "BiorxivClusteringP2P": 34.27, + "BiorxivClusteringS2S": 35.53, + "MedrxivClusteringP2P": 31.07, + "MedrxivClusteringS2S": 31.27, + "RedditClustering": 60.24, + "RedditClusteringP2P": 64.12, + "StackExchangeClustering": 70.73, + "StackExchangeClusteringP2P": 34.5, + "TwentyNewsgroupsClustering": 52.18 } ] }, "PairClassification": { "max_ap": [ { - "Model": "gtr-t5-xl", - "SprintDuplicateQuestions": 95.45, - "TwitterSemEval2015": 77.81, - "TwitterURLCorpus": 85.14 + "Model": "LLM2Vec-Mistral-supervised", + "SprintDuplicateQuestions": 96.82, + "TwitterSemEval2015": 80.6, + "TwitterURLCorpus": 86.56 }, { - "Model": "gtr-t5-xl", - "SprintDuplicateQuestions": 95.45, - "TwitterSemEval2015": 77.81, - "TwitterURLCorpus": 85.14 + "Model": "LLM2Vec-Mistral-supervised", + "SprintDuplicateQuestions": 96.82, + "TwitterSemEval2015": 80.6, + "TwitterURLCorpus": 86.56 } ] }, "Reranking": { "map": [ { - "Model": "gtr-t5-xl", - "AskUbuntuDupQuestions": 63.08, + "Model": "LLM2Vec-Mistral-supervised", + "AskUbuntuDupQuestions": 63.98, "MindSmallReranking": 31.5, - "SciDocsRR": 76.49, - "StackOverflowDupQuestions": 52.79 + "SciDocsRR": 83.8, + "StackOverflowDupQuestions": 54.41 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "gtr-t5-xl", - "ArguAna": 52.81, - "CQADupstackRetrieval": 37.35, - "ClimateFEVER": 27.01, - "DBPedia": 39.74, - "FEVER": 72.18, - "FiQA2018": 44.19, - "HotpotQA": 58.91, - "MSMARCO": 43.52, - "NFCorpus": 33.34, - "NQ": 56.16, - "QuoraRetrieval": 88.91, - "SCIDOCS": 15.71, - "SciFact": 64.2, - "TRECCOVID": 60.09, - "Touche2020": 25.26 + "Model": "LLM2Vec-Mistral-supervised", + "ArguAna": 57.48, + "CQADupstackRetrieval": 48.84, + "ClimateFEVER": 35.19, + "DBPedia": 49.58, + "FEVER": 89.4, + "FiQA2018": 53.11, + "HotpotQA": 74.07, + "MSMARCO": 42.17, + "NFCorpus": 39.33, + "NQ": 61.7, + "QuoraRetrieval": 87.75, + "SCIDOCS": 22.5, + "SciFact": 78.86, + "TRECCOVID": 77.69, + "Touche2020": 22.18 } ] }, "STS": { "cosine_spearman": [ { - "Model": "gtr-t5-xl", - "BIOSSES": 78.94, - "SICK-R": 73.63, - "STS12": 69.11, - "STS13": 81.82, - "STS14": 77.07, - "STS15": 86.01, - "STS16": 82.23, - "STS17 (ar-ar)": 9.06, - "STS17 (en-ar)": -3.22, - "STS17 (en-de)": 70.38, - "STS17 (en-en)": 84.9, - "STS17 (en-tr)": 17.17, - "STS17 (es-en)": 60.24, - "STS17 (es-es)": 81.93, - "STS17 (fr-en)": 62.17, - "STS17 (it-en)": 59.11, - "STS17 (ko-ko)": 8.9, - "STS17 (nl-en)": 56.91, - "STS22 (ar)": 37.66, - "STS22 (de)": 50.58, - "STS22 (de-en)": 53.63, - "STS22 (de-fr)": 55.72, - "STS22 (de-pl)": 27.99, - "STS22 (en)": 66.61, - "STS22 (es)": 59.14, - "STS22 (es-en)": 69.99, - "STS22 (es-it)": 60.94, - "STS22 (fr)": 79.43, - "STS22 (fr-pl)": 61.98, - "STS22 (it)": 67.14, - "STS22 (pl)": 33.74, - "STS22 (pl-en)": 60.18, - "STS22 (ru)": 32.69, - "STS22 (tr)": 55.79, - "STS22 (zh)": 31.16, - "STS22 (zh-en)": 28.85, - "STSBenchmark": 77.65 + "Model": "LLM2Vec-Mistral-supervised", + "BIOSSES": 85.24, + "SICK-R": 83.7, + "STS12": 78.8, + "STS13": 86.37, + "STS14": 84.04, + "STS15": 88.99, + "STS16": 87.22, + "STS17": 90.19, + "STS22": 67.68, + "STSBenchmark": 88.65 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "gtr-t5-xl", - "SummEval": 30.21 + "Model": "LLM2Vec-Mistral-supervised", + "SummEval": 29.96 } ] }, @@ -2815,87 +1996,118 @@ "p-MRR": [] } }, - "voyage-2": { + "LLM2Vec-Mistral-unsupervised": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "voyage-2", - "AmazonReviewsClassification (fr)": 37.26, - "MTOPDomainClassification (fr)": 79.79, - "MTOPIntentClassification (fr)": 45.62, - "MasakhaNEWSClassification (fra)": 80.19, - "MassiveIntentClassification (fr)": 53.7, - "MassiveScenarioClassification (fr)": 62.46 + "Model": "LLM2Vec-Mistral-unsupervised", + "AmazonCounterfactualClassification": 76.94, + "AmazonPolarityClassification": 85.29, + "AmazonReviewsClassification": 47.09, + "Banking77Classification": 86.16, + "EmotionClassification": 48.88, + "ImdbClassification": 77.95, + "MTOPDomainClassification": 95.48, + "MTOPIntentClassification": 82.84, + "MassiveIntentClassification": 76.65, + "MassiveScenarioClassification": 79.99, + "ToxicConversationsClassification": 70.71, + "TweetSentimentExtractionClassification": 60.9 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-2", - "AlloProfClusteringP2P": 57.96, - "AlloProfClusteringS2S": 41.65, - "HALClusteringS2S": 24.84, - "MLSUMClusteringP2P": 45.08, - "MLSUMClusteringS2S": 38.77, - "MasakhaNEWSClusteringP2P (fra)": 48.54, - "MasakhaNEWSClusteringS2S (fra)": 36.33 + "Model": "LLM2Vec-Mistral-unsupervised", + "ArxivClusteringP2P": 47.56, + "ArxivClusteringS2S": 39.92, + "BiorxivClusteringP2P": 36.14, + "BiorxivClusteringS2S": 30.26, + "MedrxivClusteringP2P": 30.11, + "MedrxivClusteringS2S": 26.93, + "RedditClustering": 41.83, + "RedditClusteringP2P": 62.08, + "StackExchangeClustering": 67.34, + "StackExchangeClusteringP2P": 34.5, + "TwentyNewsgroupsClustering": 30.26 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-2", - "OpusparcusPC (fr)": 89.76, - "PawsXPairClassification (fr)": 58.96 + "Model": "LLM2Vec-Mistral-unsupervised", + "SprintDuplicateQuestions": 91.3, + "TwitterSemEval2015": 68.76, + "TwitterURLCorpus": 82.76 }, { - "Model": "voyage-2", - "OpusparcusPC (fr)": 89.83, - "PawsXPairClassification (fr)": 58.97 + "Model": "LLM2Vec-Mistral-unsupervised", + "SprintDuplicateQuestions": 91.3, + "TwitterSemEval2015": 68.76, + "TwitterURLCorpus": 82.76 } ] }, "Reranking": { "map": [ { - "Model": "voyage-2", - "AlloprofReranking": 63.54, - "SyntecReranking": 82.65 + "Model": "LLM2Vec-Mistral-unsupervised", + "AskUbuntuDupQuestions": 58.6, + "MindSmallReranking": 29.73, + "SciDocsRR": 77.81, + "StackOverflowDupQuestions": 49.8 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-2", - "AlloprofRetrieval": 45.5, - "BSARDRetrieval": 0.15, - "MintakaRetrieval (fr)": 15.51, - "SyntecRetrieval": 75.83, - "XPQARetrieval (fr)": 67.07 + "Model": "LLM2Vec-Mistral-unsupervised", + "ArguAna": 51.0, + "CQADupstackRetrieval": 33.37, + "ClimateFEVER": 22.97, + "DBPedia": 25.48, + "FEVER": 45.11, + "FiQA2018": 27.24, + "HotpotQA": 54.54, + "MSMARCO": 19.13, + "NFCorpus": 27.16, + "NQ": 34.16, + "QuoraRetrieval": 84.4, + "SCIDOCS": 15.35, + "SciFact": 68.68, + "TRECCOVID": 55.67, + "Touche2020": 6.54 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-2", - "SICKFr": 68.51, - "STS22 (fr)": 70.51, - "STSBenchmarkMultilingualSTS (fr)": 76.43 + "Model": "LLM2Vec-Mistral-unsupervised", + "BIOSSES": 83.29, + "SICK-R": 75.55, + "STS12": 67.65, + "STS13": 83.9, + "STS14": 76.97, + "STS15": 83.8, + "STS16": 81.91, + "STS17": 85.58, + "STS22": 65.93, + "STSBenchmark": 80.42 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "voyage-2", - "SummEvalFr": 30.88 + "Model": "LLM2Vec-Mistral-unsupervised", + "SummEval": 30.19 } ] }, @@ -2906,332 +2118,243 @@ "p-MRR": [] } }, - "sbert_large_mt_nlu_ru": { + "LLM2Vec-Sheared-Llama-supervised": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "sbert_large_mt_nlu_ru", - "GeoreviewClassification (rus-Cyrl)": 39.67, - "HeadlineClassification (rus-Cyrl)": 77.19, - "InappropriatenessClassification (rus-Cyrl)": 64.64, - "KinopoiskClassification (rus-Cyrl)": 50.33, - "MassiveIntentClassification (rus-Cyrl)": 61.42, - "MassiveScenarioClassification (rus-Cyrl)": 68.13, - "RuReviewsClassification (rus-Cyrl)": 58.29, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 54.19, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.8 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "AmazonCounterfactualClassification": 77.42, + "AmazonPolarityClassification": 82.05, + "AmazonReviewsClassification": 40.81, + "Banking77Classification": 86.01, + "EmotionClassification": 48.38, + "ImdbClassification": 75.33, + "MTOPDomainClassification": 94.09, + "MTOPIntentClassification": 77.05, + "MassiveIntentClassification": 75.58, + "MassiveScenarioClassification": 79.16, + "ToxicConversationsClassification": 69.92, + "TweetSentimentExtractionClassification": 60.76 } ] }, "Clustering": { "v_measure": [ { - "Model": "sbert_large_mt_nlu_ru", - "GeoreviewClusteringP2P (rus-Cyrl)": 57.07, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 51.44, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.36 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "ArxivClusteringP2P": 43.47, + "ArxivClusteringS2S": 39.85, + "BiorxivClusteringP2P": 37.1, + "BiorxivClusteringS2S": 34.28, + "MedrxivClusteringP2P": 33.55, + "MedrxivClusteringS2S": 31.11, + "RedditClustering": 53.02, + "RedditClusteringP2P": 60.47, + "StackExchangeClustering": 63.04, + "StackExchangeClusteringP2P": 34.01, + "TwentyNewsgroupsClustering": 49.37 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sbert_large_mt_nlu_ru", - "TERRa (rus-Cyrl)": 51.97 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "SprintDuplicateQuestions": 96.25, + "TwitterSemEval2015": 76.14, + "TwitterURLCorpus": 86.23 }, { - "Model": "sbert_large_mt_nlu_ru", - "TERRa (rus-Cyrl)": 51.97 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "SprintDuplicateQuestions": 96.25, + "TwitterSemEval2015": 76.14, + "TwitterURLCorpus": 86.23 } ] }, "Reranking": { "map": [ { - "Model": "sbert_large_mt_nlu_ru", - "MIRACLReranking (rus-Cyrl)": 24.99 - }, - { - "Model": "sbert_large_mt_nlu_ru", - "RuBQReranking (rus-Cyrl)": 56.14 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "AskUbuntuDupQuestions": 60.71, + "MindSmallReranking": 31.96, + "SciDocsRR": 79.23, + "StackOverflowDupQuestions": 49.61 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sbert_large_mt_nlu_ru", - "MIRACLRetrieval (rus-Cyrl)": 6.2, - "RiaNewsRetrieval (rus-Cyrl)": 21.4, - "RuBQRetrieval (rus-Cyrl)": 29.8 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "ArguAna": 51.66, + "CQADupstackRetrieval": 41.73, + "ClimateFEVER": 33.49, + "DBPedia": 43.58, + "FEVER": 86.81, + "FiQA2018": 41.0, + "HotpotQA": 63.85, + "MSMARCO": 38.32, + "NFCorpus": 37.12, + "NQ": 53.89, + "QuoraRetrieval": 87.37, + "SCIDOCS": 17.96, + "SciFact": 72.08, + "TRECCOVID": 80.41, + "Touche2020": 22.31 } ] }, "STS": { "cosine_spearman": [ { - "Model": "sbert_large_mt_nlu_ru", - "RUParaPhraserSTS (rus-Cyrl)": 65.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 71.22, - "STS22 (rus-Cyrl)": 56.82 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "BIOSSES": 85.88, + "SICK-R": 82.25, + "STS12": 78.28, + "STS13": 85.52, + "STS14": 82.49, + "STS15": 88.76, + "STS16": 87.11, + "STS17": 90.1, + "STS22": 68.25, + "STSBenchmark": 87.16 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ + "cosine_spearman": [ { - "Model": "sbert_large_mt_nlu_ru", - "CEDRClassification (rus-Cyrl)": 36.81, - "SensitiveTopicsClassification (rus-Cyrl)": 28.54 + "Model": "LLM2Vec-Sheared-Llama-supervised", + "SummEval": 30.01 } ] }, + "MultilabelClassification": { + "accuracy": [] + }, "InstructionRetrieval": { "p-MRR": [] } }, - "gbert-base": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [ - { - "Model": "gbert-base", - "BlurbsClusteringP2P": 35.36, - "BlurbsClusteringS2S": 11.27, - "TenKGnadClusteringP2P": 37.16, - "TenKGnadClusteringS2S": 24.23 - } - ] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "nb-bert-large": { - "BitextMining": { - "f1": [ - { - "Model": "nb-bert-large", - "BornholmBitextMining": 4.53 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "nb-bert-large", - "AngryTweetsClassification": 52.14, - "DKHateClassification": 62.13, - "DanishPoliticalCommentsClassification": 35.04, - "LccSentimentClassification": 56.27, - "MassiveIntentClassification (da)": 57.03, - "MassiveIntentClassification (nb)": 62.68, - "MassiveIntentClassification (sv)": 55.02, - "MassiveScenarioClassification (da)": 60.43, - "MassiveScenarioClassification (nb)": 67.44, - "MassiveScenarioClassification (sv)": 57.12, - "NoRecClassification": 55.46, - "NordicLangClassification": 85.27, - "NorwegianParliament": 62.58, - "ScalaDaClassification": 62.85, - "ScalaNbClassification": 66.97 - } - ] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "OpenSearch-text-hybrid": { + "LLM2Vec-Sheared-Llama-unsupervised": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "OpenSearch-text-hybrid", - "AmazonReviewsClassification (zh)": 46.18, - "IFlyTek": 51.8, - "JDReview": 86.02, - "MassiveIntentClassification (zh-CN)": 73.85, - "MassiveScenarioClassification (zh-CN)": 77.13, - "MultilingualSentiment": 76.35, - "OnlineShopping": 93.2, - "TNews": 53.06, - "Waimai": 88.1 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "AmazonCounterfactualClassification": 72.93, + "AmazonPolarityClassification": 74.28, + "AmazonReviewsClassification": 36.14, + "Banking77Classification": 79.0, + "EmotionClassification": 42.85, + "ImdbClassification": 71.92, + "MTOPDomainClassification": 91.24, + "MTOPIntentClassification": 74.08, + "MassiveIntentClassification": 69.99, + "MassiveScenarioClassification": 75.15, + "ToxicConversationsClassification": 68.4, + "TweetSentimentExtractionClassification": 56.08 } ] }, "Clustering": { "v_measure": [ { - "Model": "OpenSearch-text-hybrid", - "CLSClusteringP2P": 41.64, - "CLSClusteringS2S": 40.33, - "ThuNewsClusteringP2P": 69.28, - "ThuNewsClusteringS2S": 63.75 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "ArxivClusteringP2P": 42.92, + "ArxivClusteringS2S": 35.2, + "BiorxivClusteringP2P": 35.02, + "BiorxivClusteringS2S": 27.21, + "MedrxivClusteringP2P": 30.15, + "MedrxivClusteringS2S": 26.96, + "RedditClustering": 38.67, + "RedditClusteringP2P": 53.42, + "StackExchangeClustering": 59.35, + "StackExchangeClusteringP2P": 31.47, + "TwentyNewsgroupsClustering": 31.54 } ] }, "PairClassification": { "max_ap": [ { - "Model": "OpenSearch-text-hybrid", - "Cmnli": 90.77, - "Ocnli": 85.44 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "SprintDuplicateQuestions": 77.36, + "TwitterSemEval2015": 61.54, + "TwitterURLCorpus": 77.73 }, { - "Model": "OpenSearch-text-hybrid", - "Cmnli": 90.77, - "Ocnli": 85.44 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "SprintDuplicateQuestions": 77.36, + "TwitterSemEval2015": 61.54, + "TwitterURLCorpus": 77.73 } ] }, "Reranking": { "map": [ { - "Model": "OpenSearch-text-hybrid", - "CMedQAv1": 88.99, - "CMedQAv2": 89.6, - "MMarcoReranking": 28.12, - "T2Reranking": 66.38 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "AskUbuntuDupQuestions": 52.7, + "MindSmallReranking": 29.52, + "SciDocsRR": 67.76, + "StackOverflowDupQuestions": 40.82 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "OpenSearch-text-hybrid", - "CmedqaRetrieval": 46.56, - "CovidRetrieval": 84.03, - "DuRetrieval": 87.85, - "EcomRetrieval": 68.79, - "MMarcoRetrieval": 79.93, - "MedicalRetrieval": 65.92, - "T2Retrieval": 86.76, - "VideoRetrieval": 75.43 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "ArguAna": 43.64, + "CQADupstackRetrieval": 18.5, + "ClimateFEVER": 18.95, + "DBPedia": 13.21, + "FEVER": 16.96, + "FiQA2018": 16.99, + "HotpotQA": 22.64, + "MSMARCO": 7.03, + "NFCorpus": 15.73, + "NQ": 17.96, + "QuoraRetrieval": 78.23, + "SCIDOCS": 5.53, + "SciFact": 38.31, + "TRECCOVID": 56.04, + "Touche2020": 19.17 } ] }, "STS": { "cosine_spearman": [ { - "Model": "OpenSearch-text-hybrid", - "AFQMC": 59.11, - "ATEC": 58.19, - "BQ": 71.07, - "LCQMC": 78.27, - "PAWSX": 44.98, - "QBQTC": 38.69, - "STS22 (zh)": 66.53, - "STSB": 82.8 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "BIOSSES": 75.12, + "SICK-R": 69.34, + "STS12": 60.09, + "STS13": 72.52, + "STS14": 66.7, + "STS15": 77.69, + "STS16": 75.94, + "STS17": 81.67, + "STS22": 63.7, + "STSBenchmark": 73.36 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "e5-mistral-7b-instruct-noinstruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "cosine_spearman": [ { - "Model": "e5-mistral-7b-instruct-noinstruct", - "ARCChallenge": 20.48, - "AlphaNLI": 18.88, - "HellaSwag": 32.25, - "PIQA": 32.8, - "Quail": 6.25, - "RARbCode": 79.84, - "RARbMath": 76.19, - "SIQA": 5.08, - "SpartQA": 10.87, - "TempReasonL1": 3.04, - "TempReasonL2Fact": 35.63, - "TempReasonL2Pure": 9.32, - "TempReasonL3Fact": 30.41, - "TempReasonL3Pure": 14.39, - "WinoGrande": 45.18 + "Model": "LLM2Vec-Sheared-Llama-unsupervised", + "SummEval": 31.23 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, @@ -3239,261 +2362,914 @@ "p-MRR": [] } }, - "dfm-encoder-large-v1": { + "LaBSE": { "BitextMining": { "f1": [ { - "Model": "dfm-encoder-large-v1", - "BornholmBitextMining": 11.65 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "dfm-encoder-large-v1", - "AngryTweetsClassification": 53.8, - "DKHateClassification": 60.09, - "DanishPoliticalCommentsClassification": 36.6, - "LccSentimentClassification": 57.33, - "MassiveIntentClassification (da)": 60.55, - "MassiveIntentClassification (nb)": 52.49, - "MassiveIntentClassification (sv)": 49.74, - "MassiveScenarioClassification (da)": 64.16, - "MassiveScenarioClassification (nb)": 54.59, - "MassiveScenarioClassification (sv)": 50.1, - "NoRecClassification": 48.3, - "NordicLangClassification": 77.68, - "NorwegianParliament": 58.78, - "ScalaDaClassification": 63.08, - "ScalaNbClassification": 58.95 + "Model": "LaBSE", + "BUCC": 99.16, + "BornholmBitextMining": 45.63, + "Tatoeba (ber-eng)": 8.4, + "Tatoeba (hye-eng)": 94.09, + "Tatoeba (ces-eng)": 96.68, + "Tatoeba (slk-eng)": 96.5, + "Tatoeba (cat-eng)": 95.38, + "Tatoeba (awa-eng)": 71.7, + "Tatoeba (lat-eng)": 80.07, + "Tatoeba (hsb-eng)": 67.11, + "Tatoeba (swh-eng)": 84.5, + "Tatoeba (ind-eng)": 93.66, + "Tatoeba (xho-eng)": 91.55, + "Tatoeba (nno-eng)": 94.48, + "Tatoeba (csb-eng)": 52.57, + "Tatoeba (kzj-eng)": 11.33, + "Tatoeba (isl-eng)": 94.75, + "Tatoeba (nld-eng)": 96.07, + "Tatoeba (ell-eng)": 95.35, + "Tatoeba (por-eng)": 94.14, + "Tatoeba (ita-eng)": 92.72, + "Tatoeba (vie-eng)": 97.2, + "Tatoeba (uzb-eng)": 84.23, + "Tatoeba (bul-eng)": 94.58, + "Tatoeba (tgl-eng)": 96.02, + "Tatoeba (gla-eng)": 85.66, + "Tatoeba (mkd-eng)": 93.6, + "Tatoeba (tel-eng)": 97.86, + "Tatoeba (kaz-eng)": 87.49, + "Tatoeba (zsm-eng)": 95.62, + "Tatoeba (hin-eng)": 96.87, + "Tatoeba (ile-eng)": 85.58, + "Tatoeba (jpn-eng)": 95.38, + "Tatoeba (orv-eng)": 38.93, + "Tatoeba (kab-eng)": 4.31, + "Tatoeba (yue-eng)": 89.58, + "Tatoeba (bel-eng)": 95.0, + "Tatoeba (dtp-eng)": 10.85, + "Tatoeba (ron-eng)": 96.92, + "Tatoeba (arz-eng)": 76.0, + "Tatoeba (tzl-eng)": 58.88, + "Tatoeba (slv-eng)": 96.03, + "Tatoeba (jav-eng)": 79.77, + "Tatoeba (pol-eng)": 97.22, + "Tatoeba (heb-eng)": 91.53, + "Tatoeba (ang-eng)": 59.28, + "Tatoeba (ara-eng)": 88.8, + "Tatoeba (tuk-eng)": 75.27, + "Tatoeba (afr-eng)": 96.18, + "Tatoeba (kor-eng)": 90.95, + "Tatoeba (mhr-eng)": 15.74, + "Tatoeba (fry-eng)": 89.31, + "Tatoeba (urd-eng)": 93.22, + "Tatoeba (srp-eng)": 94.43, + "Tatoeba (cbk-eng)": 79.44, + "Tatoeba (tat-eng)": 85.92, + "Tatoeba (wuu-eng)": 90.18, + "Tatoeba (war-eng)": 60.29, + "Tatoeba (kat-eng)": 95.02, + "Tatoeba (nds-eng)": 79.42, + "Tatoeba (gle-eng)": 93.8, + "Tatoeba (mal-eng)": 98.45, + "Tatoeba (tha-eng)": 96.14, + "Tatoeba (fin-eng)": 96.37, + "Tatoeba (cor-eng)": 10.11, + "Tatoeba (tur-eng)": 98.0, + "Tatoeba (yid-eng)": 88.79, + "Tatoeba (eus-eng)": 95.01, + "Tatoeba (cmn-eng)": 95.1, + "Tatoeba (fao-eng)": 87.4, + "Tatoeba (lfn-eng)": 67.54, + "Tatoeba (bos-eng)": 94.92, + "Tatoeba (arq-eng)": 42.69, + "Tatoeba (sqi-eng)": 96.76, + "Tatoeba (ben-eng)": 88.55, + "Tatoeba (pes-eng)": 94.7, + "Tatoeba (fra-eng)": 94.86, + "Tatoeba (deu-eng)": 99.2, + "Tatoeba (spa-eng)": 98.4, + "Tatoeba (oci-eng)": 65.81, + "Tatoeba (ina-eng)": 95.37, + "Tatoeba (hrv-eng)": 96.95, + "Tatoeba (gsw-eng)": 46.5, + "Tatoeba (swe-eng)": 95.63, + "Tatoeba (bre-eng)": 15.07, + "Tatoeba (hun-eng)": 96.55, + "Tatoeba (uig-eng)": 92.4, + "Tatoeba (mar-eng)": 92.65, + "Tatoeba (nob-eng)": 98.4, + "Tatoeba (rus-eng)": 93.75, + "Tatoeba (ceb-eng)": 64.42, + "Tatoeba (aze-eng)": 94.93, + "Tatoeba (dsb-eng)": 64.81, + "Tatoeba (tam-eng)": 89.0, + "Tatoeba (est-eng)": 96.55, + "Tatoeba (cym-eng)": 92.0, + "Tatoeba (amh-eng)": 91.47, + "Tatoeba (dan-eng)": 95.71, + "Tatoeba (epo-eng)": 98.2, + "Tatoeba (ast-eng)": 90.68, + "Tatoeba (swg-eng)": 59.36, + "Tatoeba (pms-eng)": 64.57, + "Tatoeba (kur-eng)": 83.59, + "Tatoeba (khm-eng)": 78.37, + "Tatoeba (ido-eng)": 89.42, + "Tatoeba (ukr-eng)": 93.97, + "Tatoeba (mon-eng)": 95.91, + "Tatoeba (nov-eng)": 74.38, + "Tatoeba (cha-eng)": 31.77, + "Tatoeba (pam-eng)": 10.73, + "Tatoeba (lvs-eng)": 95.88, + "Tatoeba (max-eng)": 63.26, + "Tatoeba (lit-eng)": 96.47, + "Tatoeba (glg-eng)": 96.82, + "Tatoeba": 95.62 } ] }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "text-embedding-3-large-256": { - "BitextMining": { - "f1": [] - }, "Classification": { "accuracy": [ { - "Model": "text-embedding-3-large-256", - "AmazonCounterfactualClassification (en)": 73.96, - "AmazonPolarityClassification": 91.32, - "AmazonReviewsClassification (en)": 46.03, - "Banking77Classification": 83.19, - "EmotionClassification": 45.8, - "ImdbClassification": 85.93, - "MTOPDomainClassification (en)": 92.76, - "MTOPIntentClassification (en)": 70.45, - "MassiveIntentClassification (en)": 71.12, - "MassiveScenarioClassification (en)": 75.56, - "ToxicConversationsClassification": 68.52, - "TweetSentimentExtractionClassification": 58.98 + "Model": "LaBSE", + "AllegroReviews": 34.89, + "AmazonCounterfactualClassification (en-ext)": 76.12, + "AmazonCounterfactualClassification (en)": 75.9, + "AmazonCounterfactualClassification (de)": 73.17, + "AmazonCounterfactualClassification (ja)": 76.4, + "AmazonCounterfactualClassification": 76.42, + "AmazonPolarityClassification": 68.95, + "AmazonReviewsClassification (en)": 35.81, + "AmazonReviewsClassification (de)": 39.93, + "AmazonReviewsClassification (es)": 39.39, + "AmazonReviewsClassification (fr)": 38.53, + "AmazonReviewsClassification (ja)": 36.45, + "AmazonReviewsClassification (zh)": 36.45, + "AmazonReviewsClassification": 36.45, + "AngryTweetsClassification": 51.11, + "Banking77Classification": 69.85, + "CBD": 65.71, + "DanishPoliticalCommentsClassification": 38.34, + "EmotionClassification": 37.22, + "GeoreviewClassification": 40.86, + "HeadlineClassification": 68.75, + "IFlyTek": 43.19, + "ImdbClassification": 62.04, + "InappropriatenessClassification": 58.52, + "JDReview": 79.14, + "KinopoiskClassification": 46.77, + "LccSentimentClassification": 50.07, + "MTOPDomainClassification (en)": 86.08, + "MTOPDomainClassification (de)": 86.93, + "MTOPDomainClassification (es)": 84.06, + "MTOPDomainClassification (fr)": 84.14, + "MTOPDomainClassification (hi)": 85.11, + "MTOPDomainClassification (th)": 81.24, + "MTOPDomainClassification": 81.24, + "MTOPIntentClassification (en)": 63.07, + "MTOPIntentClassification (de)": 63.46, + "MTOPIntentClassification (es)": 64.46, + "MTOPIntentClassification (fr)": 62.05, + "MTOPIntentClassification (hi)": 62.61, + "MTOPIntentClassification (th)": 64.7, + "MTOPIntentClassification": 64.61, + "MasakhaNEWSClassification (amh)": 81.78, + "MasakhaNEWSClassification (eng)": 77.77, + "MasakhaNEWSClassification (fra)": 72.09, + "MasakhaNEWSClassification (hau)": 73.12, + "MasakhaNEWSClassification (ibo)": 69.1, + "MasakhaNEWSClassification (lin)": 74.63, + "MasakhaNEWSClassification (lug)": 57.44, + "MasakhaNEWSClassification (orm)": 51.6, + "MasakhaNEWSClassification (pcm)": 91.44, + "MasakhaNEWSClassification (run)": 73.76, + "MasakhaNEWSClassification (sna)": 87.18, + "MasakhaNEWSClassification (som)": 60.03, + "MasakhaNEWSClassification (swa)": 69.33, + "MasakhaNEWSClassification (tir)": 61.73, + "MasakhaNEWSClassification (xho)": 77.34, + "MasakhaNEWSClassification (yor)": 77.13, + "MasakhaNEWSClassification": 77.39, + "MassiveIntentClassification (ms)": 58.59, + "MassiveIntentClassification (sl)": 59.39, + "MassiveIntentClassification (hu)": 59.52, + "MassiveIntentClassification (sv)": 59.64, + "MassiveIntentClassification (nl)": 59.31, + "MassiveIntentClassification (id)": 61.14, + "MassiveIntentClassification (vi)": 56.68, + "MassiveIntentClassification (zh-CN)": 63.85, + "MassiveIntentClassification (am)": 55.67, + "MassiveIntentClassification (ko)": 60.93, + "MassiveIntentClassification (el)": 56.96, + "MassiveIntentClassification (nb)": 57.81, + "MassiveIntentClassification (jv)": 50.94, + "MassiveIntentClassification (fa)": 62.33, + "MassiveIntentClassification (ja)": 63.13, + "MassiveIntentClassification (it)": 59.75, + "MassiveIntentClassification (es)": 58.26, + "MassiveIntentClassification (my)": 57.23, + "MassiveIntentClassification (fi)": 60.09, + "MassiveIntentClassification (he)": 56.42, + "MassiveIntentClassification (is)": 54.83, + "MassiveIntentClassification (tl)": 55.1, + "MassiveIntentClassification (kn)": 56.2, + "MassiveIntentClassification (ur)": 56.68, + "MassiveIntentClassification (ml)": 57.75, + "MassiveIntentClassification (tr)": 60.91, + "MassiveIntentClassification (en)": 61.44, + "MassiveIntentClassification (sw)": 51.5, + "MassiveIntentClassification (km)": 48.46, + "MassiveIntentClassification (ar)": 50.78, + "MassiveIntentClassification (ru)": 60.64, + "MassiveIntentClassification (te)": 58.33, + "MassiveIntentClassification (ta)": 54.95, + "MassiveIntentClassification (pt)": 60.15, + "MassiveIntentClassification (zh-TW)": 59.47, + "MassiveIntentClassification (pl)": 59.75, + "MassiveIntentClassification (az)": 58.91, + "MassiveIntentClassification (ro)": 57.84, + "MassiveIntentClassification (ka)": 48.26, + "MassiveIntentClassification (sq)": 58.08, + "MassiveIntentClassification (bn)": 58.14, + "MassiveIntentClassification (th)": 56.45, + "MassiveIntentClassification (da)": 58.23, + "MassiveIntentClassification (de)": 56.16, + "MassiveIntentClassification (hy)": 56.22, + "MassiveIntentClassification (mn)": 58.46, + "MassiveIntentClassification (cy)": 50.17, + "MassiveIntentClassification (af)": 56.07, + "MassiveIntentClassification (fr)": 60.42, + "MassiveIntentClassification (hi)": 59.37, + "MassiveIntentClassification (lv)": 57.06, + "MassiveIntentClassification": 59.51, + "MassiveScenarioClassification (te)": 64.12, + "MassiveScenarioClassification (de)": 62.4, + "MassiveScenarioClassification (el)": 64.57, + "MassiveScenarioClassification (ml)": 62.24, + "MassiveScenarioClassification (nl)": 65.17, + "MassiveScenarioClassification (jv)": 58.29, + "MassiveScenarioClassification (ro)": 62.39, + "MassiveScenarioClassification (pt)": 63.28, + "MassiveScenarioClassification (pl)": 64.55, + "MassiveScenarioClassification (sl)": 64.26, + "MassiveScenarioClassification (ta)": 59.07, + "MassiveScenarioClassification (sw)": 58.37, + "MassiveScenarioClassification (tl)": 60.23, + "MassiveScenarioClassification (id)": 65.86, + "MassiveScenarioClassification (ru)": 65.23, + "MassiveScenarioClassification (sv)": 65.99, + "MassiveScenarioClassification (ar)": 57.74, + "MassiveScenarioClassification (fr)": 65.1, + "MassiveScenarioClassification (th)": 64.32, + "MassiveScenarioClassification (hy)": 61.29, + "MassiveScenarioClassification (nb)": 64.3, + "MassiveScenarioClassification (cy)": 56.12, + "MassiveScenarioClassification (my)": 62.94, + "MassiveScenarioClassification (bn)": 61.86, + "MassiveScenarioClassification (it)": 64.09, + "MassiveScenarioClassification (ko)": 67.3, + "MassiveScenarioClassification (kn)": 61.74, + "MassiveScenarioClassification (he)": 63.52, + "MassiveScenarioClassification (am)": 62.02, + "MassiveScenarioClassification (ka)": 53.37, + "MassiveScenarioClassification (vi)": 61.06, + "MassiveScenarioClassification (ur)": 61.47, + "MassiveScenarioClassification (zh-TW)": 67.05, + "MassiveScenarioClassification (en)": 66.44, + "MassiveScenarioClassification (ms)": 65.62, + "MassiveScenarioClassification (is)": 61.93, + "MassiveScenarioClassification (km)": 56.2, + "MassiveScenarioClassification (hu)": 65.82, + "MassiveScenarioClassification (fi)": 64.56, + "MassiveScenarioClassification (sq)": 64.56, + "MassiveScenarioClassification (fa)": 67.43, + "MassiveScenarioClassification (af)": 63.38, + "MassiveScenarioClassification (mn)": 62.62, + "MassiveScenarioClassification (da)": 65.26, + "MassiveScenarioClassification (az)": 63.47, + "MassiveScenarioClassification (es)": 63.61, + "MassiveScenarioClassification (ja)": 67.7, + "MassiveScenarioClassification (tr)": 65.43, + "MassiveScenarioClassification (lv)": 61.86, + "MassiveScenarioClassification (zh-CN)": 70.84, + "MassiveScenarioClassification (hi)": 64.41, + "MassiveScenarioClassification": 67.08, + "MultilingualSentiment": 64.6, + "NoRecClassification": 45.45, + "NordicLangClassification": 35.39, + "OnlineShopping": 85.63, + "PAC": 68.11, + "PolEmo2.0-IN": 64.0, + "PolEmo2.0-OUT": 44.72, + "RuReviewsClassification": 58.01, + "RuSciBenchGRNTIClassification": 53.04, + "RuSciBenchOECDClassification": 40.48, + "TNews": 46.02, + "ToxicConversationsClassification": 66.9, + "TweetSentimentExtractionClassification": 58.82, + "Waimai": 82.85 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-3-large-256", - "ArxivClusteringP2P": 47.05, - "ArxivClusteringS2S": 42.59, - "BiorxivClusteringP2P": 35.43, - "BiorxivClusteringS2S": 33.86, - "MedrxivClusteringP2P": 32.1, - "MedrxivClusteringS2S": 31.15, - "RedditClustering": 60.18, - "RedditClusteringP2P": 64.71, - "StackExchangeClustering": 71.23, - "StackExchangeClusteringP2P": 35.95, - "TwentyNewsgroupsClustering": 54.24 + "Model": "LaBSE", + "8TagsClustering": 12.96, + "AlloProfClusteringP2P": 54.78, + "AlloProfClusteringS2S": 31.6, + "ArxivClusteringP2P": 32.13, + "ArxivClusteringS2S": 22.05, + "BiorxivClusteringP2P": 29.84, + "BiorxivClusteringS2S": 20.57, + "GeoreviewClusteringP2P": 52.19, + "HALClusteringS2S": 20.62, + "MLSUMClusteringP2P (ru)": 39.45, + "MLSUMClusteringP2P": 42.09, + "MLSUMClusteringS2S (ru)": 35.77, + "MLSUMClusteringS2S": 34.84, + "MasakhaNEWSClusteringP2P (amh)": 67.78, + "MasakhaNEWSClusteringP2P (eng)": 48.16, + "MasakhaNEWSClusteringP2P (fra)": 46.16, + "MasakhaNEWSClusteringP2P (hau)": 39.77, + "MasakhaNEWSClusteringP2P (ibo)": 62.67, + "MasakhaNEWSClusteringP2P (lin)": 62.98, + "MasakhaNEWSClusteringP2P (lug)": 47.76, + "MasakhaNEWSClusteringP2P (orm)": 28.76, + "MasakhaNEWSClusteringP2P (pcm)": 77.16, + "MasakhaNEWSClusteringP2P (run)": 60.36, + "MasakhaNEWSClusteringP2P (sna)": 63.57, + "MasakhaNEWSClusteringP2P (som)": 34.94, + "MasakhaNEWSClusteringP2P (swa)": 27.26, + "MasakhaNEWSClusteringP2P (tir)": 51.59, + "MasakhaNEWSClusteringP2P (xho)": 45.32, + "MasakhaNEWSClusteringP2P (yor)": 48.73, + "MasakhaNEWSClusteringP2P": 46.16, + "MasakhaNEWSClusteringS2S (amh)": 52.73, + "MasakhaNEWSClusteringS2S (eng)": 32.6, + "MasakhaNEWSClusteringS2S (fra)": 38.13, + "MasakhaNEWSClusteringS2S (hau)": 31.62, + "MasakhaNEWSClusteringS2S (ibo)": 32.27, + "MasakhaNEWSClusteringS2S (lin)": 49.38, + "MasakhaNEWSClusteringS2S (lug)": 47.63, + "MasakhaNEWSClusteringS2S (orm)": 25.05, + "MasakhaNEWSClusteringS2S (pcm)": 68.18, + "MasakhaNEWSClusteringS2S (run)": 52.39, + "MasakhaNEWSClusteringS2S (sna)": 46.9, + "MasakhaNEWSClusteringS2S (som)": 24.08, + "MasakhaNEWSClusteringS2S (swa)": 15.83, + "MasakhaNEWSClusteringS2S (tir)": 49.07, + "MasakhaNEWSClusteringS2S (xho)": 28.52, + "MasakhaNEWSClusteringS2S (yor)": 32.26, + "MasakhaNEWSClusteringS2S": 38.13, + "MedrxivClusteringP2P": 30.13, + "MedrxivClusteringS2S": 24.82, + "RedditClustering": 28.79, + "RedditClusteringP2P": 49.14, + "RuSciBenchGRNTIClusteringP2P": 49.07, + "RuSciBenchOECDClusteringP2P": 41.97, + "StackExchangeClustering": 35.43, + "StackExchangeClusteringP2P": 28.83, + "TwentyNewsgroupsClustering": 23.28 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-embedding-3-large-256", - "SprintDuplicateQuestions": 89.02, - "TwitterSemEval2015": 76.56, - "TwitterURLCorpus": 87.09 + "Model": "LaBSE", + "CDSC-E": 68.92, + "OpusparcusPC (de)": 96.58, + "OpusparcusPC (en)": 98.12, + "OpusparcusPC (fi)": 94.44, + "OpusparcusPC (fr)": 93.96, + "OpusparcusPC (ru)": 87.3, + "OpusparcusPC (sv)": 93.69, + "PSC": 97.42, + "PawsXPairClassification (de)": 51.07, + "PawsXPairClassification (en)": 54.07, + "PawsXPairClassification (es)": 52.19, + "PawsXPairClassification (fr)": 54.63, + "PawsXPairClassification (ja)": 47.56, + "PawsXPairClassification (ko)": 49.39, + "PawsXPairClassification (zh)": 54.26, + "SICK-E-PL": 63.77, + "SprintDuplicateQuestions": 89.26, + "TERRa": 55.71, + "TwitterSemEval2015": 62.78, + "TwitterURLCorpus": 84.58 + }, + { + "Model": "LaBSE", + "CDSC-E": 68.92, + "OpusparcusPC (de)": 96.58, + "OpusparcusPC (en)": 98.12, + "OpusparcusPC (fi)": 94.44, + "OpusparcusPC (fr)": 93.96, + "OpusparcusPC (ru)": 87.3, + "OpusparcusPC (sv)": 93.69, + "OpusparcusPC": 93.96, + "PPC": 86.97, + "PSC": 97.42, + "PawsXPairClassification (de)": 51.45, + "PawsXPairClassification (en)": 54.07, + "PawsXPairClassification (es)": 52.19, + "PawsXPairClassification (fr)": 54.7, + "PawsXPairClassification (ja)": 47.74, + "PawsXPairClassification (ko)": 49.42, + "PawsXPairClassification (zh)": 54.55, + "PawsXPairClassification": 54.69, + "SICK-E-PL": 63.77, + "SprintDuplicateQuestions": 89.26, + "TERRa": 55.71, + "TwitterSemEval2015": 62.78, + "TwitterURLCorpus": 84.58 + }, + { + "Model": "LaBSE", + "CDSC-E": 68.91, + "OpusparcusPC": 93.96, + "PPC": 86.97, + "PSC": 97.42, + "PawsXPairClassification": 54.63, + "SICK-E-PL": 63.77, + "SprintDuplicateQuestions": 89.26, + "TwitterSemEval2015": 62.78, + "TwitterURLCorpus": 84.58 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-3-large-256", - "AskUbuntuDupQuestions": 64.61, - "MindSmallReranking": 29.63, - "SciDocsRR": 84.25, - "StackOverflowDupQuestions": 53.46 + "Model": "LaBSE", + "AlloprofReranking": 49.51, + "AskUbuntuDupQuestions": 52.75, + "MMarcoReranking": 14.83, + "MindSmallReranking": 29.81, + "RuBQReranking": 55.13, + "SciDocsRR": 68.72, + "StackOverflowDupQuestions": 42.42, + "SyntecReranking": 73.28, + "T2Reranking": 63.29 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-large-256", - "ArguAna": 55.6, - "CQADupstackRetrieval": 42.28, - "ClimateFEVER": 25.8, - "DBPedia": 40.8, - "FEVER": 84.57, - "FiQA2018": 50.33, - "HotpotQA": 62.69, - "MSMARCO": 37.93, - "NFCorpus": 37.94, - "NQ": 56.64, - "QuoraRetrieval": 88.22, - "SCIDOCS": 20.44, - "SciFact": 73.1, - "TRECCOVID": 76.24, - "Touche2020": 22.31 + "Model": "LaBSE", + "AILACasedocs": 17.67, + "AILAStatutes": 16.72, + "ARCChallenge": 3.78, + "AlloprofRetrieval": 19.77, + "AlphaNLI": 13.11, + "AppsRetrieval": 2.39, + "ArguAna": 34.18, + "ArguAna-PL": 38.52, + "BSARDRetrieval": 0.0, + "CQADupstackRetrieval": 18.75, + "ClimateFEVER": 3.83, + "CmedqaRetrieval": 5.49, + "CodeFeedbackMT": 17.98, + "CodeFeedbackST": 35.98, + "CodeSearchNetCCRetrieval (python)": 40.41, + "CodeSearchNetCCRetrieval (javascript)": 52.0, + "CodeSearchNetCCRetrieval (go)": 41.73, + "CodeSearchNetCCRetrieval (ruby)": 41.46, + "CodeSearchNetCCRetrieval (java)": 45.83, + "CodeSearchNetCCRetrieval (php)": 31.96, + "CodeSearchNetRetrieval (python)": 60.52, + "CodeSearchNetRetrieval (javascript)": 46.24, + "CodeSearchNetRetrieval (go)": 48.56, + "CodeSearchNetRetrieval (ruby)": 51.34, + "CodeSearchNetRetrieval (java)": 35.05, + "CodeSearchNetRetrieval (php)": 46.03, + "CodeTransOceanContest": 28.1, + "CodeTransOceanDL": 26.45, + "CosQA": 8.77, + "CovidRetrieval": 28.6, + "DBPedia": 15.57, + "DBPedia-PL": 16.1, + "DuRetrieval": 26.34, + "EcomRetrieval": 25.42, + "FEVER": 12.18, + "FiQA-PL": 7.63, + "FiQA2018": 7.0, + "GerDaLIRSmall": 4.59, + "HellaSwag": 5.59, + "HotpotQA": 18.75, + "HotpotQA-PL": 19.72, + "LEMBNarrativeQARetrieval": 11.45, + "LEMBQMSumRetrieval": 14.07, + "LEMBSummScreenFDRetrieval": 40.52, + "LEMBWikimQARetrieval": 28.1, + "LeCaRDv2": 24.68, + "LegalBenchConsumerContractsQA": 54.66, + "LegalBenchCorporateLobbying": 69.39, + "LegalQuAD": 16.64, + "LegalSummarization": 53.89, + "MMarcoRetrieval": 34.78, + "MSMARCO": 7.6, + "MSMARCO-PL": 7.22, + "MedicalRetrieval": 6.68, + "MintakaRetrieval (ar)": 14.06, + "MintakaRetrieval (de)": 15.26, + "MintakaRetrieval (es)": 15.65, + "MintakaRetrieval (fr)": 15.53, + "MintakaRetrieval (hi)": 13.67, + "MintakaRetrieval (it)": 15.94, + "MintakaRetrieval (ja)": 12.8, + "MintakaRetrieval (pt)": 15.03, + "MintakaRetrieval": 15.53, + "NFCorpus": 16.54, + "NFCorpus-PL": 17.45, + "NQ": 8.42, + "NQ-PL": 9.65, + "PIQA": 6.53, + "Quail": 1.91, + "Quora-PL": 74.96, + "QuoraRetrieval": 77.03, + "RARbCode": 2.31, + "RARbMath": 27.19, + "RiaNewsRetrieval": 42.75, + "RuBQRetrieval": 30.02, + "SCIDOCS": 5.63, + "SCIDOCS-PL": 7.48, + "SIQA": 1.07, + "SciFact": 38.2, + "SciFact-PL": 39.79, + "SpartQA": 1.56, + "StackOverflowQA": 38.23, + "SyntecRetrieval": 55.31, + "SyntheticText2SQL": 43.28, + "T2Retrieval": 25.32, + "TRECCOVID": 16.34, + "TRECCOVID-PL": 18.45, + "TempReasonL1": 1.56, + "TempReasonL2Fact": 7.06, + "TempReasonL2Pure": 0.14, + "TempReasonL3Fact": 8.74, + "TempReasonL3Pure": 4.73, + "Touche2020": 4.88, + "VideoRetrieval": 22.04, + "WinoGrande": 54.3, + "XPQARetrieval (ara-ara)": 35.19, + "XPQARetrieval (eng-ara)": 20.64, + "XPQARetrieval (ara-eng)": 32.47, + "XPQARetrieval (deu-deu)": 53.56, + "XPQARetrieval (eng-deu)": 24.31, + "XPQARetrieval (deu-eng)": 54.87, + "XPQARetrieval (spa-spa)": 44.49, + "XPQARetrieval (eng-spa)": 25.31, + "XPQARetrieval (spa-eng)": 43.4, + "XPQARetrieval (fra-fra)": 51.74, + "XPQARetrieval (eng-fra)": 21.29, + "XPQARetrieval (fra-eng)": 49.4, + "XPQARetrieval (hin-hin)": 66.64, + "XPQARetrieval (eng-hin)": 23.25, + "XPQARetrieval (hin-eng)": 64.54, + "XPQARetrieval (ita-ita)": 56.27, + "XPQARetrieval (eng-ita)": 25.8, + "XPQARetrieval (ita-eng)": 52.69, + "XPQARetrieval (jpn-jpn)": 58.6, + "XPQARetrieval (eng-jpn)": 21.49, + "XPQARetrieval (jpn-eng)": 52.41, + "XPQARetrieval (kor-kor)": 27.63, + "XPQARetrieval (eng-kor)": 23.33, + "XPQARetrieval (kor-eng)": 23.97, + "XPQARetrieval (pol-pol)": 37.33, + "XPQARetrieval (eng-pol)": 16.19, + "XPQARetrieval (pol-eng)": 37.7, + "XPQARetrieval (por-por)": 38.48, + "XPQARetrieval (eng-por)": 19.41, + "XPQARetrieval (por-eng)": 37.31, + "XPQARetrieval (tam-tam)": 37.33, + "XPQARetrieval (eng-tam)": 20.53, + "XPQARetrieval (tam-eng)": 30.14, + "XPQARetrieval (cmn-cmn)": 50.7, + "XPQARetrieval (eng-cmn)": 20.59, + "XPQARetrieval (cmn-eng)": 48.23, + "XPQARetrieval": 51.74 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-embedding-3-large-256", - "BIOSSES": 84.87, - "SICK-R": 79.18, - "STS12": 71.98, - "STS13": 85.52, - "STS14": 80.5, - "STS15": 87.51, - "STS16": 84.48, - "STS17 (en-en)": 88.11, - "STS22 (en)": 65.92, - "STSBenchmark": 82.34 + "Model": "LaBSE", + "AFQMC": 21.02, + "ATEC": 26.61, + "BIOSSES": 78.7, + "BQ": 42.6, + "CDSC-R": 85.53, + "LCQMC": 52.19, + "PAWSX": 10.23, + "RUParaPhraserSTS": 65.74, + "RuSTSBenchmarkSTS": 73.34, + "SICK-R": 69.99, + "SICK-R-PL": 65.9, + "SICKFr": 69.94, + "STS12": 65.08, + "STS13": 67.98, + "STS14": 64.03, + "STS15": 76.59, + "STS16": 72.98, + "STS17 (es-en)": 65.71, + "STS17 (en-de)": 73.85, + "STS17 (fr-en)": 76.98, + "STS17 (en-tr)": 72.07, + "STS17 (ko-ko)": 71.32, + "STS17 (es-es)": 80.83, + "STS17 (it-en)": 76.99, + "STS17 (ar-ar)": 69.07, + "STS17 (en-en)": 79.45, + "STS17 (nl-en)": 75.22, + "STS17 (en-ar)": 74.51, + "STS22 (pl-en)": 69.41, + "STS22 (de-en)": 50.14, + "STS22 (es)": 63.18, + "STS22 (de-pl)": 58.69, + "STS22 (fr)": 77.95, + "STS22 (fr-pl)": 61.98, + "STS22 (de)": 48.58, + "STS22 (pl)": 39.3, + "STS22 (en)": 61.63, + "STS22 (es-it)": 69.69, + "STS22 (zh-en)": 64.02, + "STS22 (de-fr)": 53.28, + "STS22 (tr)": 58.15, + "STS22 (it)": 72.22, + "STS22 (ru)": 57.49, + "STS22 (es-en)": 71.86, + "STS22 (ar)": 57.67, + "STS22 (zh)": 63.02, + "STSB": 68.38, + "STSBenchmark": 72.25, + "STSBenchmarkMultilingualSTS (nl)": 70.22, + "STSBenchmarkMultilingualSTS (de)": 72.43, + "STSBenchmarkMultilingualSTS (pt)": 71.65, + "STSBenchmarkMultilingualSTS (fr)": 75.1, + "STSBenchmarkMultilingualSTS (en)": 72.25, + "STSBenchmarkMultilingualSTS (pl)": 72.58, + "STSBenchmarkMultilingualSTS (es)": 72.92, + "STSBenchmarkMultilingualSTS (zh)": 69.5, + "STSBenchmarkMultilingualSTS (ru)": 73.06, + "STSBenchmarkMultilingualSTS (it)": 72.97 + }, + { + "Model": "LaBSE", + "BIOSSES": 78.7, + "CDSC-R": 85.53, + "SICK-R": 69.99, + "SICK-R-PL": 65.9, + "SICKFr": 69.94, + "STS12": 65.08, + "STS13": 67.98, + "STS14": 64.03, + "STS15": 76.59, + "STS16": 72.98, + "STS17": 75.22, + "STS22": 64.02, + "STSBenchmark": 72.25, + "STSBenchmarkMultilingualSTS": 75.1 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "text-embedding-3-large-256", - "SummEval": 29.92 + "Model": "LaBSE", + "SummEval": 31.05, + "SummEvalFr": 30.16 + }, + { + "Model": "LaBSE", + "SummEval": 31.05, + "SummEvalFr": 30.16 + }, + { + "Model": "LaBSE", + "SummEval": 31.05, + "SummEvalFr": 30.16 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "LaBSE", + "CEDRClassification": 40.61, + "SensitiveTopicsClassification": 22.23 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "LaBSE", + "Core17InstructionRetrieval": 1.49, + "News21InstructionRetrieval": -1.11, + "Robust04InstructionRetrieval": -9.37 + } + ] } }, - "bge-large-zh-v1.5": { + "LaBSE-en-ru": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "LaBSE-en-ru", + "Tatoeba (rus-eng)": 93.62 + } + ] }, "Classification": { "accuracy": [ { - "Model": "bge-large-zh-v1.5", - "AmazonReviewsClassification (zh)": 41.38, - "IFlyTek": 48.74, - "JDReview": 85.14, - "MassiveIntentClassification (zh-CN)": 68.84, - "MassiveScenarioClassification (zh-CN)": 74.7, - "MultilingualSentiment": 72.97, - "OnlineShopping": 91.43, - "TNews": 52.1, - "Waimai": 86.9 + "Model": "LaBSE-en-ru", + "GeoreviewClassification": 40.89, + "HeadlineClassification": 68.75, + "InappropriatenessClassification": 58.48, + "KinopoiskClassification": 49.85, + "MassiveIntentClassification (sw)": 19.98, + "MassiveIntentClassification (az)": 19.52, + "MassiveIntentClassification (tr)": 24.12, + "MassiveIntentClassification (zh-CN)": 3.96, + "MassiveIntentClassification (am)": 2.76, + "MassiveIntentClassification (kn)": 2.86, + "MassiveIntentClassification (hi)": 3.29, + "MassiveIntentClassification (tl)": 27.08, + "MassiveIntentClassification (th)": 4.0, + "MassiveIntentClassification (sv)": 32.01, + "MassiveIntentClassification (de)": 35.14, + "MassiveIntentClassification (es)": 37.67, + "MassiveIntentClassification (pt)": 39.84, + "MassiveIntentClassification (ja)": 4.78, + "MassiveIntentClassification (fi)": 31.11, + "MassiveIntentClassification (ka)": 2.87, + "MassiveIntentClassification (sl)": 35.66, + "MassiveIntentClassification (ru)": 60.53, + "MassiveIntentClassification (it)": 43.32, + "MassiveIntentClassification (te)": 2.72, + "MassiveIntentClassification (af)": 30.59, + "MassiveIntentClassification (is)": 25.61, + "MassiveIntentClassification (fa)": 3.71, + "MassiveIntentClassification (vi)": 23.0, + "MassiveIntentClassification (bn)": 3.35, + "MassiveIntentClassification (hy)": 2.8, + "MassiveIntentClassification (pl)": 31.3, + "MassiveIntentClassification (cy)": 26.59, + "MassiveIntentClassification (jv)": 26.84, + "MassiveIntentClassification (mn)": 35.97, + "MassiveIntentClassification (en)": 60.48, + "MassiveIntentClassification (ms)": 27.82, + "MassiveIntentClassification (nb)": 35.78, + "MassiveIntentClassification (he)": 2.33, + "MassiveIntentClassification (km)": 4.6, + "MassiveIntentClassification (nl)": 34.66, + "MassiveIntentClassification (id)": 33.31, + "MassiveIntentClassification (ml)": 2.63, + "MassiveIntentClassification (ta)": 2.22, + "MassiveIntentClassification (my)": 3.57, + "MassiveIntentClassification (ur)": 3.36, + "MassiveIntentClassification (da)": 38.66, + "MassiveIntentClassification (zh-TW)": 5.29, + "MassiveIntentClassification (ro)": 37.45, + "MassiveIntentClassification (lv)": 23.92, + "MassiveIntentClassification (fr)": 40.29, + "MassiveIntentClassification (el)": 11.14, + "MassiveIntentClassification (sq)": 35.84, + "MassiveIntentClassification (hu)": 26.74, + "MassiveIntentClassification (ko)": 2.69, + "MassiveIntentClassification (ar)": 5.19, + "MassiveScenarioClassification (sw)": 25.61, + "MassiveScenarioClassification (az)": 24.48, + "MassiveScenarioClassification (tr)": 31.38, + "MassiveScenarioClassification (zh-CN)": 9.98, + "MassiveScenarioClassification (am)": 7.59, + "MassiveScenarioClassification (kn)": 8.73, + "MassiveScenarioClassification (hi)": 8.77, + "MassiveScenarioClassification (tl)": 35.12, + "MassiveScenarioClassification (th)": 8.69, + "MassiveScenarioClassification (sv)": 35.83, + "MassiveScenarioClassification (de)": 41.72, + "MassiveScenarioClassification (es)": 43.33, + "MassiveScenarioClassification (pt)": 44.62, + "MassiveScenarioClassification (ja)": 9.51, + "MassiveScenarioClassification (fi)": 33.79, + "MassiveScenarioClassification (ka)": 7.32, + "MassiveScenarioClassification (sl)": 37.6, + "MassiveScenarioClassification (ru)": 65.15, + "MassiveScenarioClassification (it)": 47.28, + "MassiveScenarioClassification (te)": 7.53, + "MassiveScenarioClassification (af)": 37.27, + "MassiveScenarioClassification (is)": 30.32, + "MassiveScenarioClassification (fa)": 6.83, + "MassiveScenarioClassification (vi)": 28.92, + "MassiveScenarioClassification (bn)": 8.57, + "MassiveScenarioClassification (hy)": 8.91, + "MassiveScenarioClassification (pl)": 33.75, + "MassiveScenarioClassification (cy)": 30.38, + "MassiveScenarioClassification (jv)": 33.94, + "MassiveScenarioClassification (mn)": 41.53, + "MassiveScenarioClassification (en)": 65.43, + "MassiveScenarioClassification (ms)": 36.28, + "MassiveScenarioClassification (nb)": 42.43, + "MassiveScenarioClassification (he)": 8.64, + "MassiveScenarioClassification (km)": 9.99, + "MassiveScenarioClassification (nl)": 41.47, + "MassiveScenarioClassification (id)": 39.05, + "MassiveScenarioClassification (ml)": 7.24, + "MassiveScenarioClassification (ta)": 7.71, + "MassiveScenarioClassification (my)": 9.94, + "MassiveScenarioClassification (ur)": 9.16, + "MassiveScenarioClassification (da)": 44.69, + "MassiveScenarioClassification (zh-TW)": 10.48, + "MassiveScenarioClassification (ro)": 44.55, + "MassiveScenarioClassification (lv)": 26.26, + "MassiveScenarioClassification (fr)": 45.08, + "MassiveScenarioClassification (el)": 19.46, + "MassiveScenarioClassification (sq)": 40.9, + "MassiveScenarioClassification (hu)": 33.92, + "MassiveScenarioClassification (ko)": 7.37, + "MassiveScenarioClassification (ar)": 12.43, + "RuReviewsClassification": 58.01, + "RuSciBenchGRNTIClassification": 52.8, + "RuSciBenchOECDClassification": 40.36 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-large-zh-v1.5", - "CLSClusteringP2P": 41.44, - "CLSClusteringS2S": 38.33, - "ThuNewsClusteringP2P": 59.61, - "ThuNewsClusteringS2S": 56.58 + "Model": "LaBSE-en-ru", + "GeoreviewClusteringP2P": 51.89, + "MLSUMClusteringP2P (ru)": 37.87, + "MLSUMClusteringS2S (ru)": 41.24, + "RuSciBenchGRNTIClusteringP2P": 47.48, + "RuSciBenchOECDClusteringP2P": 41.16 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bge-large-zh-v1.5", - "Cmnli": 85.27, - "Ocnli": 77.94 + "Model": "LaBSE-en-ru", + "OpusparcusPC (ru)": 87.18, + "TERRa": 55.61 }, { - "Model": "bge-large-zh-v1.5", - "Cmnli": 85.29, - "Ocnli": 77.96 + "Model": "LaBSE-en-ru", + "OpusparcusPC (ru)": 87.18, + "TERRa": 55.61 } ] }, "Reranking": { "map": [ { - "Model": "bge-large-zh-v1.5", - "CMedQAv1": 83.45, - "CMedQAv2": 85.44, - "MMarcoReranking": 28.74, - "T2Reranking": 65.74 + "Model": "LaBSE-en-ru", + "MIRACLReranking (ru)": 28.86 + }, + { + "Model": "LaBSE-en-ru", + "RuBQReranking": 54.83 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-large-zh-v1.5", - "CmedqaRetrieval": 42.57, - "CovidRetrieval": 73.35, - "DuRetrieval": 86.32, - "EcomRetrieval": 65.33, - "MMarcoRetrieval": 79.23, - "MedicalRetrieval": 59.59, - "T2Retrieval": 83.99, - "VideoRetrieval": 73.32 + "Model": "LaBSE-en-ru", + "MIRACLRetrieval (ru)": 10.58, + "RiaNewsRetrieval": 34.73, + "RuBQRetrieval": 29.03 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bge-large-zh-v1.5", - "AFQMC": 44.36, - "ATEC": 49.54, - "BQ": 62.94, - "LCQMC": 74.33, - "PAWSX": 33.92, - "QBQTC": 37.29, - "STS22 (zh)": 68.94, - "STSB": 78.7 + "Model": "LaBSE-en-ru", + "RUParaPhraserSTS": 65.87, + "RuSTSBenchmarkSTS": 73.32, + "STS22 (de)": 38.9, + "STS22 (en)": 59.47, + "STS22 (pl-en)": 58.73, + "STS22 (es)": 60.85, + "STS22 (fr)": 74.98, + "STS22 (de-en)": 47.98, + "STS22 (de-fr)": 59.4, + "STS22 (de-pl)": 39.48, + "STS22 (pl)": 32.74, + "STS22 (tr)": 55.04, + "STS22 (es-en)": 70.8, + "STS22 (ru)": 58.53, + "STS22 (it)": 68.58, + "STS22 (fr-pl)": 61.98, + "STS22 (es-it)": 66.83, + "STS22 (zh-en)": 24.98, + "STS22 (ar)": 31.85, + "STS22 (zh)": 35.1, + "STSBenchmarkMultilingualSTS (ru)": 73.02 } ] }, @@ -3501,185 +3277,206 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "LaBSE-en-ru", + "CEDRClassification": 40.75, + "SensitiveTopicsClassification": 21.79 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "flan-t5-base": { + "LaBSE-ru-turbo": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "LaBSE-ru-turbo", + "Tatoeba (rus-eng)": 93.22 + } + ] }, "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "accuracy": [ { - "Model": "flan-t5-base", - "Core17InstructionRetrieval": -3.31, - "News21InstructionRetrieval": -0.12, - "Robust04InstructionRetrieval": 5.35 + "Model": "LaBSE-ru-turbo", + "GeoreviewClassification": 46.04, + "HeadlineClassification": 69.98, + "InappropriatenessClassification": 61.39, + "KinopoiskClassification": 53.59, + "MassiveIntentClassification (ru)": 66.08, + "MassiveScenarioClassification (ru)": 71.13, + "RuReviewsClassification": 64.58, + "RuSciBenchGRNTIClassification": 56.67, + "RuSciBenchOECDClassification": 43.58 } ] - } - }, - "Cohere-embed-english-v3.0-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "LaBSE-ru-turbo", + "GeoreviewClusteringP2P": 64.55, + "MLSUMClusteringP2P (ru)": 45.7, + "MLSUMClusteringS2S (ru)": 42.93, + "RuSciBenchGRNTIClusteringP2P": 50.64, + "RuSciBenchOECDClusteringP2P": 44.48 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "LaBSE-ru-turbo", + "OpusparcusPC (ru)": 89.32, + "TERRa": 57.81 + }, + { + "Model": "LaBSE-ru-turbo", + "OpusparcusPC (ru)": 89.32, + "TERRa": 57.81 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "LaBSE-ru-turbo", + "MIRACLReranking (ru)": 57.44 + }, + { + "Model": "LaBSE-ru-turbo", + "RuBQReranking": 68.65 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Cohere-embed-english-v3.0-instruct", - "ARCChallenge": 10.1, - "AlphaNLI": 18.75, - "HellaSwag": 29.02, - "PIQA": 27.89, - "Quail": 7.77, - "RARbCode": 56.56, - "RARbMath": 72.05, - "SIQA": 5.03, - "SpartQA": 3.33, - "TempReasonL1": 1.43, - "TempReasonL2Fact": 40.46, - "TempReasonL2Pure": 2.39, - "TempReasonL3Fact": 33.87, - "TempReasonL3Pure": 7.52, - "WinoGrande": 65.02 + "Model": "LaBSE-ru-turbo", + "MIRACLRetrieval (ru)": 55.97, + "RiaNewsRetrieval": 69.36, + "RuBQRetrieval": 65.71 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LaBSE-ru-turbo", + "RUParaPhraserSTS": 72.97, + "RuSTSBenchmarkSTS": 81.77, + "STS22 (ru)": 62.89, + "STSBenchmarkMultilingualSTS (ru)": 81.81 + } + ] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "LaBSE-ru-turbo", + "CEDRClassification": 45.11, + "SensitiveTopicsClassification": 27.52 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "sentence-croissant-llm-base": { + "OpenSearch-text-hybrid": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "sentence-croissant-llm-base", - "AmazonReviewsClassification (fr)": 34.79, - "MTOPDomainClassification (fr)": 85.52, - "MTOPIntentClassification (fr)": 63.12, - "MasakhaNEWSClassification (fra)": 79.29, - "MassiveIntentClassification (fr)": 59.41, - "MassiveScenarioClassification (fr)": 65.29 + "Model": "OpenSearch-text-hybrid", + "AmazonReviewsClassification": 46.18, + "IFlyTek": 51.8, + "JDReview": 86.02, + "MassiveIntentClassification": 73.85, + "MassiveScenarioClassification": 77.13, + "MultilingualSentiment": 76.35, + "OnlineShopping": 93.2, + "TNews": 53.06, + "Waimai": 88.1 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-croissant-llm-base", - "AlloProfClusteringP2P": 64.12, - "AlloProfClusteringS2S": 32.52, - "HALClusteringS2S": 23.4, - "MLSUMClusteringP2P": 42.94, - "MLSUMClusteringS2S": 33.91, - "MasakhaNEWSClusteringP2P (fra)": 53.94, - "MasakhaNEWSClusteringS2S (fra)": 41.05 + "Model": "OpenSearch-text-hybrid", + "CLSClusteringP2P": 41.64, + "CLSClusteringS2S": 40.33, + "ThuNewsClusteringP2P": 69.28, + "ThuNewsClusteringS2S": 63.75 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sentence-croissant-llm-base", - "OpusparcusPC (fr)": 91.42, - "PawsXPairClassification (fr)": 63.13 + "Model": "OpenSearch-text-hybrid", + "Cmnli": 90.77, + "Ocnli": 85.44 }, { - "Model": "sentence-croissant-llm-base", - "OpusparcusPC (fr)": 91.42, - "PawsXPairClassification (fr)": 63.19 + "Model": "OpenSearch-text-hybrid", + "Cmnli": 90.77, + "Ocnli": 85.44 } ] }, "Reranking": { "map": [ { - "Model": "sentence-croissant-llm-base", - "AlloprofReranking": 53.0, - "SyntecReranking": 82.9 + "Model": "OpenSearch-text-hybrid", + "CMedQAv1": 88.99, + "CMedQAv2": 89.6, + "MMarcoReranking": 28.12, + "T2Reranking": 66.38 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-croissant-llm-base", - "AlloprofRetrieval": 29.97, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 21.31, - "SyntecRetrieval": 74.2, - "XPQARetrieval (fr)": 58.57 + "Model": "OpenSearch-text-hybrid", + "CmedqaRetrieval": 46.56, + "CovidRetrieval": 84.03, + "DuRetrieval": 87.85, + "EcomRetrieval": 68.79, + "MMarcoRetrieval": 79.93, + "MedicalRetrieval": 65.92, + "T2Retrieval": 86.76, + "VideoRetrieval": 75.43 } ] }, "STS": { "cosine_spearman": [ { - "Model": "sentence-croissant-llm-base", - "SICKFr": 69.6, - "STS22 (fr)": 78.77, - "STSBenchmarkMultilingualSTS (fr)": 79.23 + "Model": "OpenSearch-text-hybrid", + "AFQMC": 59.11, + "ATEC": 58.19, + "BQ": 71.07, + "LCQMC": 78.27, + "PAWSX": 44.98, + "QBQTC": 38.69, + "STS22": 66.53, + "STSB": 82.8 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "sentence-croissant-llm-base", - "SummEvalFr": 29.04 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -3688,36 +3485,12 @@ "p-MRR": [] } }, - "sentence-bert-swedish-cased": { + "SFR-Embedding-Mistral": { "BitextMining": { - "f1": [ - { - "Model": "sentence-bert-swedish-cased", - "BornholmBitextMining": 14.08 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "sentence-bert-swedish-cased", - "AngryTweetsClassification": 44.46, - "DKHateClassification": 59.36, - "DanishPoliticalCommentsClassification": 28.32, - "LccSentimentClassification": 47.2, - "MassiveIntentClassification (da)": 42.84, - "MassiveIntentClassification (nb)": 42.74, - "MassiveIntentClassification (sv)": 69.11, - "MassiveScenarioClassification (da)": 49.64, - "MassiveScenarioClassification (nb)": 49.49, - "MassiveScenarioClassification (sv)": 75.96, - "NoRecClassification": 43.53, - "NordicLangClassification": 51.45, - "NorwegianParliament": 55.74, - "ScalaDaClassification": 50.12, - "ScalaNbClassification": 50.34 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [] @@ -3729,7 +3502,36 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "SFR-Embedding-Mistral", + "BrightRetrieval (sustainable_living)": 19.79, + "BrightRetrieval (economics)": 17.84, + "BrightRetrieval (theoremqa_theorems)": 24.32, + "BrightRetrieval (aops)": 7.43, + "BrightRetrieval (theoremqa_questions)": 23.05, + "BrightRetrieval (psychology)": 18.97, + "BrightRetrieval (stackoverflow)": 12.72, + "BrightRetrieval (pony)": 1.97, + "BrightRetrieval (leetcode)": 27.35, + "BrightRetrieval (biology)": 19.49, + "BrightRetrieval (earth_science)": 26.63, + "BrightRetrieval (robotics)": 16.7 + } + ], + "recall_at_1": [ + { + "Model": "SFR-Embedding-Mistral", + "BrightRetrieval (earth_science)": 37.0, + "BrightRetrieval (biology)": 30.26, + "BrightRetrieval (stackoverflow)": 14.53, + "BrightRetrieval (sustainable_living)": 34.99, + "BrightRetrieval (psychology)": 47.72, + "BrightRetrieval (pony)": 2.0, + "BrightRetrieval (economics)": 24.27, + "BrightRetrieval (robotics)": 17.33 + } + ] }, "STS": { "cosine_spearman": [] @@ -3744,523 +3546,1578 @@ "p-MRR": [] } }, - "text-similarity-babbage-001": { + "USER-base": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "USER-base", + "Tatoeba (rus-eng)": 90.2 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "USER-base", + "GeoreviewClassification": 47.23, + "HeadlineClassification": 74.88, + "InappropriatenessClassification": 61.94, + "KinopoiskClassification": 55.69, + "MassiveIntentClassification (ru)": 65.57, + "MassiveScenarioClassification (ru)": 68.33, + "RuReviewsClassification": 66.44, + "RuSciBenchGRNTIClassification": 55.55, + "RuSciBenchOECDClassification": 43.28 + } + ] }, "Clustering": { "v_measure": [ { - "Model": "text-similarity-babbage-001", - "RedditClustering": 45.64, - "StackExchangeClustering": 53.01, - "TwentyNewsgroupsClustering": 42.01 + "Model": "USER-base", + "GeoreviewClusteringP2P": 64.16, + "MLSUMClusteringP2P (ru)": 48.09, + "MLSUMClusteringS2S (ru)": 45.73, + "RuSciBenchGRNTIClusteringP2P": 51.38, + "RuSciBenchOECDClusteringP2P": 44.73 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-similarity-babbage-001", - "SprintDuplicateQuestions": 76.46, - "TwitterSemEval2015": 70.85, - "TwitterURLCorpus": 85.08 + "Model": "USER-base", + "OpusparcusPC (ru)": 91.65, + "TERRa": 60.02 + }, + { + "Model": "USER-base", + "OpusparcusPC (ru)": 91.65, + "TERRa": 60.11 } ] }, "Reranking": { "map": [ { - "Model": "text-similarity-babbage-001", - "AskUbuntuDupQuestions": 54.68, - "SciDocsRR": 72.78, - "StackOverflowDupQuestions": 40.65 + "Model": "USER-base", + "MIRACLReranking (ru)": 46.75 + }, + { + "Model": "USER-base", + "RuBQReranking": 64.42 } ] }, "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [ + "ndcg_at_10": [ { - "Model": "text-similarity-babbage-001", - "BIOSSES": 78.12, - "SICK-R": 77.02, - "STSBenchmark": 84.32 + "Model": "USER-base", + "MIRACLRetrieval (ru)": 35.22, + "RiaNewsRetrieval": 77.83, + "RuBQRetrieval": 56.86 } ] }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "flan-t5-large": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "USER-base", + "RUParaPhraserSTS": 73.56, + "RuSTSBenchmarkSTS": 82.26, + "STS22 (ru)": 63.39, + "STSBenchmarkMultilingualSTS (ru)": 81.81 + }, + { + "Model": "USER-base", + "RUParaPhraserSTS": 73.56, + "RuSTSBenchmarkSTS": 82.26, + "STS22 (ru)": 63.39, + "STSBenchmarkMultilingualSTS (ru)": 81.81 + } + ] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "accuracy": [ { - "Model": "flan-t5-large", - "Core17InstructionRetrieval": 1.32, - "News21InstructionRetrieval": 8.95, - "Robust04InstructionRetrieval": 3.9 + "Model": "USER-base", + "CEDRClassification": 46.47, + "SensitiveTopicsClassification": 27.5 } ] + }, + "InstructionRetrieval": { + "p-MRR": [] } }, - "voyage-large-2-instruct": { + "USER-bge-m3": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "USER-bge-m3", + "Tatoeba (rus-eng)": 93.52 + } + ] }, "Classification": { "accuracy": [ { - "Model": "voyage-large-2-instruct", - "AmazonCounterfactualClassification (en)": 77.6, - "AmazonPolarityClassification": 96.58, - "AmazonReviewsClassification (en)": 50.77, - "Banking77Classification": 86.96, - "EmotionClassification": 59.81, - "ImdbClassification": 96.13, - "MTOPDomainClassification (en)": 98.86, - "MTOPIntentClassification (en)": 86.97, - "MassiveIntentClassification (en)": 81.08, - "MassiveScenarioClassification (en)": 87.95, - "ToxicConversationsClassification": 83.58, - "TweetSentimentExtractionClassification": 71.55 + "Model": "USER-bge-m3", + "GeoreviewClassification": 50.98, + "HeadlineClassification": 70.09, + "InappropriatenessClassification": 60.76, + "KinopoiskClassification": 63.33, + "MassiveIntentClassification (ru)": 68.85, + "MassiveScenarioClassification (ru)": 72.9, + "RuReviewsClassification": 68.52, + "RuSciBenchGRNTIClassification": 57.67, + "RuSciBenchOECDClassification": 44.2 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-large-2-instruct", - "ArxivClusteringP2P": 51.81, - "ArxivClusteringS2S": 44.73, - "BiorxivClusteringP2P": 46.07, - "BiorxivClusteringS2S": 40.64, - "MedrxivClusteringP2P": 42.94, - "MedrxivClusteringS2S": 41.44, - "RedditClustering": 68.5, - "RedditClusteringP2P": 64.86, - "StackExchangeClustering": 74.16, - "StackExchangeClusteringP2P": 45.1, - "TwentyNewsgroupsClustering": 66.62 + "Model": "USER-bge-m3", + "GeoreviewClusteringP2P": 62.79, + "RuSciBenchGRNTIClusteringP2P": 53.11, + "RuSciBenchOECDClusteringP2P": 44.93 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.5, - "TwitterSemEval2015": 86.32, - "TwitterURLCorpus": 86.9 + "Model": "USER-bge-m3", + "OpusparcusPC (ru)": 90.73, + "TERRa": 64.99 }, { - "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.53, - "TwitterSemEval2015": 86.32, - "TwitterURLCorpus": 86.9 + "Model": "USER-bge-m3", + "OpusparcusPC (ru)": 90.73, + "TERRa": 65.07 } ] }, "Reranking": { "map": [ { - "Model": "voyage-large-2-instruct", - "AskUbuntuDupQuestions": 64.92, - "MindSmallReranking": 30.97, - "SciDocsRR": 89.34, - "StackOverflowDupQuestions": 55.11 + "Model": "USER-bge-m3", + "MIRACLReranking (ru)": 64.35 + }, + { + "Model": "USER-bge-m3", + "RuBQReranking": 73.08 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-large-2-instruct", - "ArguAna": 64.06, - "BrightRetrieval (theoremqa_questions)": 26.06, - "BrightRetrieval (earth_science)": 25.09, - "BrightRetrieval (leetcode)": 30.6, - "BrightRetrieval (economics)": 19.85, - "BrightRetrieval (robotics)": 11.21, - "BrightRetrieval (psychology)": 24.79, - "BrightRetrieval (aops)": 7.45, - "BrightRetrieval (sustainable_living)": 15.58, - "BrightRetrieval (pony)": 1.48, - "BrightRetrieval (theoremqa_theorems)": 11.1, - "BrightRetrieval (biology)": 23.55, - "BrightRetrieval (stackoverflow)": 15.03, - "CQADupstackRetrieval": 46.6, - "ClimateFEVER": 32.65, - "DBPedia": 46.03, - "FEVER": 91.47, - "FiQA2018": 59.76, - "HotpotQA": 70.86, - "MSMARCO": 40.6, - "NFCorpus": 40.32, - "NQ": 65.92, - "QuoraRetrieval": 87.4, - "SCIDOCS": 24.32, - "SciFact": 79.99, - "TRECCOVID": 85.07, - "Touche2020": 39.16 - } - ], - "recall_at_1": [ - { - "Model": "voyage-large-2-instruct", - "BrightRetrieval (psychology)": 41.58, - "BrightRetrieval (robotics)": 12.87, - "BrightRetrieval (biology)": 34.38, - "BrightRetrieval (stackoverflow)": 13.68, - "BrightRetrieval (pony)": 1.28, - "BrightRetrieval (economics)": 26.7, - "BrightRetrieval (sustainable_living)": 31.1, - "BrightRetrieval (earth_science)": 35.35 + "Model": "USER-bge-m3", + "MIRACLRetrieval (ru)": 67.33, + "RiaNewsRetrieval": 83.53, + "RuBQRetrieval": 70.03 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-large-2-instruct", - "BIOSSES": 89.24, - "SICK-R": 83.16, - "STS12": 73.34, - "STS13": 88.49, - "STS14": 86.49, - "STS15": 91.13, - "STS16": 85.68, - "STS17 (en-en)": 90.06, - "STS22 (en)": 66.32, - "STSBenchmark": 89.22 + "Model": "USER-bge-m3", + "RUParaPhraserSTS": 76.36, + "RuSTSBenchmarkSTS": 83.35, + "STS22 (ru)": 66.42, + "STSBenchmarkMultilingualSTS (ru)": 82.96 } ] }, "Summarization": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [ { - "Model": "voyage-large-2-instruct", - "SummEval": 30.84 + "Model": "USER-bge-m3", + "CEDRClassification": 45.48, + "SensitiveTopicsClassification": 26.29 } ] }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { "p-MRR": [] } }, - "xlm-roberta-base": { + "all-MiniLM-L12-v2": { "BitextMining": { "f1": [ { - "Model": "xlm-roberta-base", - "BornholmBitextMining": 4.42 + "Model": "all-MiniLM-L12-v2", + "BornholmBitextMining": 35.25, + "Tatoeba (tat-eng)": 0.75, + "Tatoeba (yid-eng)": 0.19, + "Tatoeba (tzl-eng)": 6.87, + "Tatoeba (ben-eng)": 0.02, + "Tatoeba (sqi-eng)": 5.86, + "Tatoeba (war-eng)": 6.18, + "Tatoeba (nld-eng)": 12.56, + "Tatoeba (ast-eng)": 9.99, + "Tatoeba (awa-eng)": 0.44, + "Tatoeba (jpn-eng)": 2.18, + "Tatoeba (kat-eng)": 0.45, + "Tatoeba (pes-eng)": 0.3, + "Tatoeba (fra-eng)": 17.53, + "Tatoeba (nds-eng)": 11.35, + "Tatoeba (gle-eng)": 3.08, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (srp-eng)": 2.22, + "Tatoeba (mhr-eng)": 0.0, + "Tatoeba (bos-eng)": 7.05, + "Tatoeba (heb-eng)": 0.3, + "Tatoeba (orv-eng)": 0.15, + "Tatoeba (kaz-eng)": 0.82, + "Tatoeba (eus-eng)": 6.58, + "Tatoeba (hsb-eng)": 2.89, + "Tatoeba (max-eng)": 8.4, + "Tatoeba (kab-eng)": 0.91, + "Tatoeba (hrv-eng)": 5.68, + "Tatoeba (deu-eng)": 13.89, + "Tatoeba (kor-eng)": 0.9, + "Tatoeba (slk-eng)": 4.2, + "Tatoeba (tur-eng)": 3.69, + "Tatoeba (ron-eng)": 8.77, + "Tatoeba (nno-eng)": 7.45, + "Tatoeba (ido-eng)": 11.08, + "Tatoeba (est-eng)": 2.6, + "Tatoeba (ceb-eng)": 3.95, + "Tatoeba (bre-eng)": 3.68, + "Tatoeba (lfn-eng)": 7.52, + "Tatoeba (ina-eng)": 25.36, + "Tatoeba (nov-eng)": 19.45, + "Tatoeba (slv-eng)": 4.52, + "Tatoeba (fry-eng)": 14.53, + "Tatoeba (cbk-eng)": 9.76, + "Tatoeba (lvs-eng)": 3.45, + "Tatoeba (yue-eng)": 1.89, + "Tatoeba (ces-eng)": 4.2, + "Tatoeba (afr-eng)": 7.59, + "Tatoeba (rus-eng)": 0.07, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (ang-eng)": 14.63, + "Tatoeba (cat-eng)": 11.79, + "Tatoeba (khm-eng)": 0.42, + "Tatoeba (pam-eng)": 4.73, + "Tatoeba (pms-eng)": 8.94, + "Tatoeba (gsw-eng)": 9.9, + "Tatoeba (swg-eng)": 11.9, + "Tatoeba (tel-eng)": 0.67, + "Tatoeba (nob-eng)": 8.02, + "Tatoeba (uig-eng)": 0.4, + "Tatoeba (bel-eng)": 0.85, + "Tatoeba (lit-eng)": 1.56, + "Tatoeba (isl-eng)": 3.44, + "Tatoeba (swh-eng)": 5.82, + "Tatoeba (tha-eng)": 0.67, + "Tatoeba (mon-eng)": 0.06, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (swe-eng)": 7.31, + "Tatoeba (epo-eng)": 8.5, + "Tatoeba (ind-eng)": 5.3, + "Tatoeba (tgl-eng)": 3.34, + "Tatoeba (arq-eng)": 0.28, + "Tatoeba (aze-eng)": 1.47, + "Tatoeba (por-eng)": 11.36, + "Tatoeba (hun-eng)": 3.93, + "Tatoeba (kur-eng)": 7.3, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (csb-eng)": 5.21, + "Tatoeba (glg-eng)": 12.6, + "Tatoeba (wuu-eng)": 1.89, + "Tatoeba (uzb-eng)": 2.2, + "Tatoeba (fao-eng)": 5.92, + "Tatoeba (mal-eng)": 0.24, + "Tatoeba (dsb-eng)": 3.06, + "Tatoeba (jav-eng)": 3.5, + "Tatoeba (xho-eng)": 3.66, + "Tatoeba (dan-eng)": 10.21, + "Tatoeba (gla-eng)": 2.58, + "Tatoeba (spa-eng)": 11.26, + "Tatoeba (ber-eng)": 4.72, + "Tatoeba (ukr-eng)": 0.57, + "Tatoeba (cym-eng)": 5.13, + "Tatoeba (cha-eng)": 13.07, + "Tatoeba (pol-eng)": 4.29, + "Tatoeba (fin-eng)": 3.65, + "Tatoeba (bul-eng)": 0.23, + "Tatoeba (tuk-eng)": 2.66, + "Tatoeba (tam-eng)": 0.33, + "Tatoeba (mar-eng)": 0.04, + "Tatoeba (vie-eng)": 5.06, + "Tatoeba (ell-eng)": 0.2, + "Tatoeba (lat-eng)": 7.14, + "Tatoeba (zsm-eng)": 5.99, + "Tatoeba (ita-eng)": 12.57, + "Tatoeba (ara-eng)": 0.43, + "Tatoeba (cor-eng)": 2.47, + "Tatoeba (oci-eng)": 8.72, + "Tatoeba (mkd-eng)": 0.01, + "Tatoeba (ile-eng)": 17.43, + "Tatoeba (kzj-eng)": 3.64, + "Tatoeba (cmn-eng)": 2.45, + "Tatoeba (dtp-eng)": 3.31, + "Tatoeba (hye-eng)": 0.5 } ] }, "Classification": { "accuracy": [ { - "Model": "xlm-roberta-base", - "AmazonReviewsClassification (fr)": 26.75, - "AngryTweetsClassification": 52.41, - "DKHateClassification": 56.78, - "DanishPoliticalCommentsClassification": 34.03, - "LccSentimentClassification": 52.27, - "MTOPDomainClassification (fr)": 43.83, - "MTOPIntentClassification (fr)": 19.38, - "MasakhaNEWSClassification (fra)": 60.5, - "MassiveIntentClassification (da)": 41.06, - "MassiveIntentClassification (nb)": 40.46, - "MassiveIntentClassification (sv)": 45.12, - "MassiveIntentClassification (fr)": 13.58, - "MassiveScenarioClassification (da)": 43.91, - "MassiveScenarioClassification (nb)": 44.83, - "MassiveScenarioClassification (sv)": 47.35, - "MassiveScenarioClassification (fr)": 23.21, - "NoRecClassification": 46.28, - "NordicLangClassification": 79.39, - "NorwegianParliament": 56.75, - "ScalaDaClassification": 57.3, - "ScalaNbClassification": 58.33 + "Model": "all-MiniLM-L12-v2", + "AllegroReviews": 23.85, + "AmazonCounterfactualClassification (en-ext)": 67.24, + "AmazonCounterfactualClassification (en)": 65.28, + "AmazonCounterfactualClassification (de)": 57.13, + "AmazonCounterfactualClassification (ja)": 59.94, + "AmazonCounterfactualClassification": 59.91, + "AmazonPolarityClassification": 62.98, + "AmazonReviewsClassification (en)": 30.79, + "AmazonReviewsClassification (de)": 25.92, + "AmazonReviewsClassification (es)": 27.64, + "AmazonReviewsClassification (fr)": 27.53, + "AmazonReviewsClassification (ja)": 23.57, + "AmazonReviewsClassification (zh)": 22.99, + "AmazonReviewsClassification": 22.99, + "AngryTweetsClassification": 42.87, + "Banking77Classification": 80.4, + "CBD": 48.46, + "DanishPoliticalCommentsClassification": 27.07, + "EmotionClassification": 41.17, + "GeoreviewClassification": 23.49, + "HeadlineClassification": 28.49, + "IFlyTek": 15.31, + "ImdbClassification": 59.76, + "InappropriatenessClassification": 50.85, + "JDReview": 59.57, + "KinopoiskClassification": 34.17, + "LccSentimentClassification": 41.93, + "MTOPDomainClassification (en)": 91.88, + "MTOPDomainClassification (de)": 72.04, + "MTOPDomainClassification (es)": 72.99, + "MTOPDomainClassification (fr)": 75.57, + "MTOPDomainClassification (hi)": 40.4, + "MTOPDomainClassification (th)": 17.1, + "MTOPDomainClassification": 17.1, + "MTOPIntentClassification (en)": 62.83, + "MTOPIntentClassification (de)": 43.42, + "MTOPIntentClassification (es)": 41.91, + "MTOPIntentClassification (fr)": 38.96, + "MTOPIntentClassification (hi)": 17.76, + "MTOPIntentClassification (th)": 5.9, + "MTOPIntentClassification": 5.63, + "MasakhaNEWSClassification (amh)": 30.64, + "MasakhaNEWSClassification (eng)": 76.62, + "MasakhaNEWSClassification (fra)": 67.18, + "MasakhaNEWSClassification (hau)": 52.59, + "MasakhaNEWSClassification (ibo)": 54.26, + "MasakhaNEWSClassification (lin)": 62.23, + "MasakhaNEWSClassification (lug)": 47.62, + "MasakhaNEWSClassification (orm)": 47.17, + "MasakhaNEWSClassification (pcm)": 91.77, + "MasakhaNEWSClassification (run)": 54.47, + "MasakhaNEWSClassification (sna)": 66.53, + "MasakhaNEWSClassification (som)": 40.27, + "MasakhaNEWSClassification (swa)": 47.77, + "MasakhaNEWSClassification (tir)": 21.18, + "MasakhaNEWSClassification (xho)": 54.34, + "MasakhaNEWSClassification (yor)": 58.61, + "MasakhaNEWSClassification": 72.2, + "MassiveIntentClassification (ja)": 30.89, + "MassiveIntentClassification (zh-CN)": 23.74, + "MassiveIntentClassification (en)": 67.11, + "MassiveIntentClassification (nb)": 41.79, + "MassiveIntentClassification (ur)": 16.26, + "MassiveIntentClassification (kn)": 3.07, + "MassiveIntentClassification (ru)": 26.29, + "MassiveIntentClassification (de)": 44.12, + "MassiveIntentClassification (az)": 34.3, + "MassiveIntentClassification (hu)": 37.95, + "MassiveIntentClassification (ml)": 2.84, + "MassiveIntentClassification (tr)": 35.93, + "MassiveIntentClassification (ko)": 19.97, + "MassiveIntentClassification (mn)": 23.27, + "MassiveIntentClassification (ta)": 13.12, + "MassiveIntentClassification (fr)": 44.75, + "MassiveIntentClassification (el)": 28.68, + "MassiveIntentClassification (sw)": 35.26, + "MassiveIntentClassification (sv)": 40.33, + "MassiveIntentClassification (vi)": 37.35, + "MassiveIntentClassification (ms)": 36.16, + "MassiveIntentClassification (hi)": 18.0, + "MassiveIntentClassification (hy)": 8.69, + "MassiveIntentClassification (pl)": 37.59, + "MassiveIntentClassification (pt)": 45.08, + "MassiveIntentClassification (fi)": 39.19, + "MassiveIntentClassification (ar)": 21.02, + "MassiveIntentClassification (da)": 44.35, + "MassiveIntentClassification (af)": 38.84, + "MassiveIntentClassification (sq)": 41.47, + "MassiveIntentClassification (am)": 2.45, + "MassiveIntentClassification (nl)": 41.77, + "MassiveIntentClassification (es)": 40.82, + "MassiveIntentClassification (it)": 43.16, + "MassiveIntentClassification (km)": 4.98, + "MassiveIntentClassification (id)": 39.65, + "MassiveIntentClassification (bn)": 13.7, + "MassiveIntentClassification (is)": 35.17, + "MassiveIntentClassification (jv)": 36.67, + "MassiveIntentClassification (ka)": 9.17, + "MassiveIntentClassification (ro)": 41.64, + "MassiveIntentClassification (sl)": 38.48, + "MassiveIntentClassification (zh-TW)": 22.38, + "MassiveIntentClassification (tl)": 38.63, + "MassiveIntentClassification (fa)": 23.56, + "MassiveIntentClassification (te)": 2.54, + "MassiveIntentClassification (lv)": 38.54, + "MassiveIntentClassification (my)": 4.36, + "MassiveIntentClassification (th)": 10.46, + "MassiveIntentClassification (he)": 23.71, + "MassiveIntentClassification (cy)": 35.65, + "MassiveIntentClassification": 22.39, + "MassiveScenarioClassification (pt)": 53.0, + "MassiveScenarioClassification (zh-TW)": 31.14, + "MassiveScenarioClassification (sv)": 46.81, + "MassiveScenarioClassification (fa)": 29.0, + "MassiveScenarioClassification (lv)": 42.75, + "MassiveScenarioClassification (fr)": 53.77, + "MassiveScenarioClassification (ml)": 7.25, + "MassiveScenarioClassification (is)": 43.11, + "MassiveScenarioClassification (sq)": 49.12, + "MassiveScenarioClassification (am)": 7.41, + "MassiveScenarioClassification (cy)": 41.43, + "MassiveScenarioClassification (ro)": 49.94, + "MassiveScenarioClassification (th)": 18.32, + "MassiveScenarioClassification (kn)": 8.32, + "MassiveScenarioClassification (ur)": 24.45, + "MassiveScenarioClassification (id)": 44.37, + "MassiveScenarioClassification (el)": 35.55, + "MassiveScenarioClassification (he)": 25.73, + "MassiveScenarioClassification (my)": 10.06, + "MassiveScenarioClassification (de)": 52.08, + "MassiveScenarioClassification (hi)": 23.03, + "MassiveScenarioClassification (hu)": 44.1, + "MassiveScenarioClassification (tl)": 48.29, + "MassiveScenarioClassification (en)": 74.57, + "MassiveScenarioClassification (vi)": 40.97, + "MassiveScenarioClassification (bn)": 18.98, + "MassiveScenarioClassification (ka)": 14.85, + "MassiveScenarioClassification (hy)": 14.87, + "MassiveScenarioClassification (pl)": 44.74, + "MassiveScenarioClassification (sw)": 43.18, + "MassiveScenarioClassification (it)": 51.7, + "MassiveScenarioClassification (tr)": 41.8, + "MassiveScenarioClassification (nl)": 49.14, + "MassiveScenarioClassification (az)": 39.62, + "MassiveScenarioClassification (da)": 49.5, + "MassiveScenarioClassification (es)": 50.73, + "MassiveScenarioClassification (ar)": 27.66, + "MassiveScenarioClassification (sl)": 42.24, + "MassiveScenarioClassification (jv)": 44.54, + "MassiveScenarioClassification (ms)": 44.67, + "MassiveScenarioClassification (fi)": 45.8, + "MassiveScenarioClassification (km)": 9.75, + "MassiveScenarioClassification (ru)": 28.77, + "MassiveScenarioClassification (mn)": 29.01, + "MassiveScenarioClassification (ta)": 19.4, + "MassiveScenarioClassification (te)": 7.74, + "MassiveScenarioClassification (ja)": 36.77, + "MassiveScenarioClassification (ko)": 25.72, + "MassiveScenarioClassification (zh-CN)": 33.19, + "MassiveScenarioClassification (nb)": 47.35, + "MassiveScenarioClassification (af)": 45.72, + "MassiveScenarioClassification": 31.16, + "MultilingualSentiment": 40.52, + "NoRecClassification": 37.73, + "NordicLangClassification": 54.17, + "OnlineShopping": 58.65, + "PAC": 59.53, + "PolEmo2.0-IN": 38.32, + "PolEmo2.0-OUT": 22.98, + "RuReviewsClassification": 42.49, + "RuSciBenchGRNTIClassification": 10.49, + "RuSciBenchOECDClassification": 8.31, + "TNews": 20.37, + "ToxicConversationsClassification": 67.47, + "TweetSentimentExtractionClassification": 54.25, + "Waimai": 63.48 } ] }, "Clustering": { "v_measure": [ { - "Model": "xlm-roberta-base", - "AlloProfClusteringP2P": 52.24, - "AlloProfClusteringS2S": 20.37, - "HALClusteringS2S": 8.68, - "MLSUMClusteringP2P": 40.44, - "MLSUMClusteringS2S": 24.14, - "MasakhaNEWSClusteringP2P (fra)": 29.29, - "MasakhaNEWSClusteringS2S (fra)": 23.76 + "Model": "all-MiniLM-L12-v2", + "AlloProfClusteringP2P": 46.03, + "AlloProfClusteringS2S": 31.83, + "ArxivClusteringP2P": 46.07, + "ArxivClusteringS2S": 37.5, + "BiorxivClusteringP2P": 36.99, + "BiorxivClusteringS2S": 33.21, + "GeoreviewClusteringP2P": 20.76, + "HALClusteringS2S": 19.58, + "MLSUMClusteringP2P": 34.35, + "MLSUMClusteringS2S": 29.3, + "MasakhaNEWSClusteringP2P (amh)": 40.5, + "MasakhaNEWSClusteringP2P (eng)": 55.86, + "MasakhaNEWSClusteringP2P (fra)": 42.72, + "MasakhaNEWSClusteringP2P (hau)": 26.61, + "MasakhaNEWSClusteringP2P (ibo)": 44.26, + "MasakhaNEWSClusteringP2P (lin)": 54.52, + "MasakhaNEWSClusteringP2P (lug)": 43.87, + "MasakhaNEWSClusteringP2P (orm)": 24.87, + "MasakhaNEWSClusteringP2P (pcm)": 74.42, + "MasakhaNEWSClusteringP2P (run)": 51.73, + "MasakhaNEWSClusteringP2P (sna)": 46.89, + "MasakhaNEWSClusteringP2P (som)": 31.17, + "MasakhaNEWSClusteringP2P (swa)": 23.72, + "MasakhaNEWSClusteringP2P (tir)": 44.08, + "MasakhaNEWSClusteringP2P (xho)": 26.97, + "MasakhaNEWSClusteringP2P (yor)": 32.51, + "MasakhaNEWSClusteringP2P": 42.72, + "MasakhaNEWSClusteringS2S (amh)": 44.11, + "MasakhaNEWSClusteringS2S (eng)": 40.71, + "MasakhaNEWSClusteringS2S (fra)": 32.47, + "MasakhaNEWSClusteringS2S (hau)": 20.63, + "MasakhaNEWSClusteringS2S (ibo)": 35.33, + "MasakhaNEWSClusteringS2S (lin)": 54.52, + "MasakhaNEWSClusteringS2S (lug)": 51.42, + "MasakhaNEWSClusteringS2S (orm)": 24.84, + "MasakhaNEWSClusteringS2S (pcm)": 70.72, + "MasakhaNEWSClusteringS2S (run)": 50.88, + "MasakhaNEWSClusteringS2S (sna)": 46.6, + "MasakhaNEWSClusteringS2S (som)": 29.87, + "MasakhaNEWSClusteringS2S (swa)": 10.82, + "MasakhaNEWSClusteringS2S (tir)": 43.63, + "MasakhaNEWSClusteringS2S (xho)": 24.55, + "MasakhaNEWSClusteringS2S (yor)": 32.85, + "MasakhaNEWSClusteringS2S": 32.47, + "MedrxivClusteringP2P": 34.25, + "MedrxivClusteringS2S": 32.24, + "RedditClustering": 51.18, + "RedditClusteringP2P": 54.8, + "RuSciBenchGRNTIClusteringP2P": 10.65, + "RuSciBenchOECDClusteringP2P": 10.19, + "StackExchangeClustering": 53.05, + "StackExchangeClusteringP2P": 33.13, + "TwentyNewsgroupsClustering": 47.47 } ] }, "PairClassification": { "max_ap": [ { - "Model": "xlm-roberta-base", - "OpusparcusPC (fr)": 85.45, - "PawsXPairClassification (fr)": 51.35 + "Model": "all-MiniLM-L12-v2", + "CDSC-E": 49.04, + "OpusparcusPC (de)": 91.2, + "OpusparcusPC (en)": 97.41, + "OpusparcusPC (fi)": 85.99, + "OpusparcusPC (fr)": 87.35, + "OpusparcusPC (ru)": 79.23, + "OpusparcusPC (sv)": 84.87, + "PSC": 87.92, + "PawsXPairClassification (de)": 50.83, + "PawsXPairClassification (en)": 58.62, + "PawsXPairClassification (es)": 52.08, + "PawsXPairClassification (fr)": 55.54, + "PawsXPairClassification (ja)": 47.75, + "PawsXPairClassification (ko)": 49.59, + "PawsXPairClassification (zh)": 52.8, + "SICK-E-PL": 49.63, + "SprintDuplicateQuestions": 92.45, + "TERRa": 46.4, + "TwitterSemEval2015": 70.02, + "TwitterURLCorpus": 84.77 }, { - "Model": "xlm-roberta-base", - "OpusparcusPC (fr)": 85.91, - "PawsXPairClassification (fr)": 51.73 + "Model": "all-MiniLM-L12-v2", + "CDSC-E": 49.04, + "OpusparcusPC (de)": 91.2, + "OpusparcusPC (en)": 97.41, + "OpusparcusPC (fi)": 85.99, + "OpusparcusPC (fr)": 87.35, + "OpusparcusPC (ru)": 79.23, + "OpusparcusPC (sv)": 84.87, + "PSC": 87.92, + "PawsXPairClassification (de)": 51.07, + "PawsXPairClassification (en)": 58.7, + "PawsXPairClassification (es)": 52.08, + "PawsXPairClassification (fr)": 55.59, + "PawsXPairClassification (ja)": 47.88, + "PawsXPairClassification (ko)": 49.65, + "PawsXPairClassification (zh)": 53.01, + "SICK-E-PL": 49.63, + "SprintDuplicateQuestions": 92.58, + "TERRa": 46.4, + "TwitterSemEval2015": 70.02, + "TwitterURLCorpus": 84.77 } ] }, "Reranking": { "map": [ { - "Model": "xlm-roberta-base", - "AlloprofReranking": 25.58, - "SyntecReranking": 43.75 + "Model": "all-MiniLM-L12-v2", + "AlloprofReranking": 67.01, + "AskUbuntuDupQuestions": 64.06, + "MMarcoReranking": 5.27, + "MindSmallReranking": 31.02, + "RuBQReranking": 38.51, + "SciDocsRR": 87.2, + "StackOverflowDupQuestions": 51.47, + "SyntecReranking": 69.17, + "T2Reranking": 60.32 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "xlm-roberta-base", - "AlloprofRetrieval": 0.16, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.88, - "SyntecRetrieval": 3.33, - "XPQARetrieval (fr)": 11.65 + "Model": "all-MiniLM-L12-v2", + "AILACasedocs": 16.8, + "AILAStatutes": 20.71, + "ARCChallenge": 10.23, + "AlloprofRetrieval": 33.2, + "AlphaNLI": 25.35, + "AppsRetrieval": 5.97, + "ArguAna": 47.13, + "ArguAna-PL": 13.4, + "BSARDRetrieval": 6.24, + "CQADupstackRetrieval": 42.53, + "ClimateFEVER": 21.57, + "CmedqaRetrieval": 2.58, + "CodeFeedbackMT": 27.07, + "CodeFeedbackST": 61.01, + "CodeSearchNetCCRetrieval (python)": 71.1, + "CodeSearchNetCCRetrieval (javascript)": 70.13, + "CodeSearchNetCCRetrieval (go)": 63.52, + "CodeSearchNetCCRetrieval (ruby)": 72.28, + "CodeSearchNetCCRetrieval (java)": 67.51, + "CodeSearchNetCCRetrieval (php)": 60.81, + "CodeSearchNetRetrieval (python)": 82.09, + "CodeSearchNetRetrieval (javascript)": 67.52, + "CodeSearchNetRetrieval (go)": 89.06, + "CodeSearchNetRetrieval (ruby)": 74.77, + "CodeSearchNetRetrieval (java)": 53.2, + "CodeSearchNetRetrieval (php)": 77.36, + "CodeTransOceanContest": 59.86, + "CodeTransOceanDL": 19.94, + "CosQA": 32.09, + "CovidRetrieval": 10.79, + "DBPedia": 33.35, + "DuRetrieval": 6.62, + "EcomRetrieval": 4.01, + "FEVER": 55.9, + "FiQA-PL": 5.82, + "FiQA2018": 37.27, + "GerDaLIRSmall": 1.35, + "HellaSwag": 24.08, + "HotpotQA": 44.59, + "LEMBNarrativeQARetrieval": 19.64, + "LEMBQMSumRetrieval": 13.08, + "LEMBSummScreenFDRetrieval": 46.98, + "LEMBWikimQARetrieval": 44.88, + "LeCaRDv2": 18.77, + "LegalBenchConsumerContractsQA": 60.21, + "LegalBenchCorporateLobbying": 88.69, + "LegalQuAD": 7.44, + "LegalSummarization": 57.43, + "MMarcoRetrieval": 7.46, + "MSMARCO": 39.03, + "MedicalRetrieval": 2.3, + "MintakaRetrieval (ar)": 2.74, + "MintakaRetrieval (de)": 20.04, + "MintakaRetrieval (es)": 11.76, + "MintakaRetrieval (fr)": 16.08, + "MintakaRetrieval (hi)": 3.04, + "MintakaRetrieval (it)": 11.83, + "MintakaRetrieval (ja)": 7.31, + "MintakaRetrieval (pt)": 13.66, + "NFCorpus": 32.25, + "NFCorpus-PL": 15.43, + "NQ": 46.47, + "PIQA": 26.44, + "Quail": 3.08, + "QuoraRetrieval": 87.75, + "RARbCode": 42.44, + "RARbMath": 66.36, + "RuBQRetrieval": 8.84, + "SCIDOCS": 21.82, + "SCIDOCS-PL": 5.34, + "SIQA": 2.09, + "SciFact": 62.64, + "SciFact-PL": 22.48, + "SpartQA": 2.67, + "StackOverflowQA": 80.63, + "SyntecRetrieval": 60.8, + "SyntheticText2SQL": 43.93, + "T2Retrieval": 4.82, + "TRECCOVID": 50.82, + "TRECCOVID-PL": 16.52, + "TempReasonL1": 1.66, + "TempReasonL2Fact": 10.31, + "TempReasonL2Pure": 0.63, + "TempReasonL3Fact": 11.11, + "TempReasonL3Pure": 6.63, + "Touche2020": 17.22, + "VideoRetrieval": 9.38, + "WinoGrande": 27.2, + "XPQARetrieval (ara-ara)": 7.83, + "XPQARetrieval (eng-ara)": 2.55, + "XPQARetrieval (ara-eng)": 8.88, + "XPQARetrieval (deu-deu)": 56.77, + "XPQARetrieval (eng-deu)": 18.2, + "XPQARetrieval (deu-eng)": 30.06, + "XPQARetrieval (spa-spa)": 42.22, + "XPQARetrieval (eng-spa)": 7.53, + "XPQARetrieval (spa-eng)": 26.27, + "XPQARetrieval (fra-fra)": 55.9, + "XPQARetrieval (eng-fra)": 14.89, + "XPQARetrieval (fra-eng)": 34.15, + "XPQARetrieval (hin-hin)": 33.26, + "XPQARetrieval (eng-hin)": 6.44, + "XPQARetrieval (hin-eng)": 6.98, + "XPQARetrieval (ita-ita)": 58.68, + "XPQARetrieval (eng-ita)": 8.56, + "XPQARetrieval (ita-eng)": 28.71, + "XPQARetrieval (jpn-jpn)": 39.53, + "XPQARetrieval (eng-jpn)": 5.7, + "XPQARetrieval (jpn-eng)": 13.75, + "XPQARetrieval (kor-kor)": 13.44, + "XPQARetrieval (eng-kor)": 7.43, + "XPQARetrieval (kor-eng)": 7.4, + "XPQARetrieval (pol-pol)": 28.07, + "XPQARetrieval (eng-pol)": 10.03, + "XPQARetrieval (pol-eng)": 16.58, + "XPQARetrieval (por-por)": 34.09, + "XPQARetrieval (eng-por)": 7.38, + "XPQARetrieval (por-eng)": 22.59, + "XPQARetrieval (tam-tam)": 9.07, + "XPQARetrieval (eng-tam)": 4.15, + "XPQARetrieval (tam-eng)": 3.76, + "XPQARetrieval (cmn-cmn)": 21.07, + "XPQARetrieval (eng-cmn)": 6.58, + "XPQARetrieval (cmn-eng)": 9.39, + "XPQARetrieval": 55.9 } ] }, "STS": { "cosine_spearman": [ { - "Model": "xlm-roberta-base", - "SICKFr": 48.62, - "STS22 (fr)": 56.72, - "STSBenchmarkMultilingualSTS (fr)": 46.23 + "Model": "all-MiniLM-L12-v2", + "AFQMC": 7.94, + "ATEC": 12.97, + "BIOSSES": 83.57, + "BQ": 23.31, + "CDSC-R": 82.5, + "LCQMC": 21.04, + "PAWSX": 7.31, + "RUParaPhraserSTS": 45.47, + "RuSTSBenchmarkSTS": 56.33, + "SICK-R": 79.32, + "SICK-R-PL": 54.26, + "SICKFr": 63.16, + "STS12": 73.08, + "STS13": 82.13, + "STS14": 76.73, + "STS15": 85.58, + "STS16": 80.23, + "STS17 (ar-ar)": 58.71, + "STS17 (es-es)": 78.37, + "STS17 (en-tr)": 0.43, + "STS17 (es-en)": 22.01, + "STS17 (it-en)": 24.28, + "STS17 (en-de)": 27.54, + "STS17 (en-en)": 88.63, + "STS17 (ko-ko)": 43.37, + "STS17 (nl-en)": 24.51, + "STS17 (en-ar)": 0.54, + "STS17 (fr-en)": 30.7, + "STS22 (de-fr)": 43.52, + "STS22 (tr)": 21.6, + "STS22 (en)": 66.0, + "STS22 (ar)": 17.54, + "STS22 (pl-en)": 42.67, + "STS22 (es-it)": 40.71, + "STS22 (pl)": 19.22, + "STS22 (fr-pl)": 16.9, + "STS22 (es)": 43.98, + "STS22 (ru)": 11.19, + "STS22 (it)": 47.48, + "STS22 (de-en)": 42.86, + "STS22 (de)": 22.53, + "STS22 (zh-en)": 44.39, + "STS22 (de-pl)": 1.63, + "STS22 (fr)": 69.51, + "STS22 (zh)": 33.15, + "STS22 (es-en)": 53.99, + "STSB": 36.66, + "STSBenchmark": 83.09, + "STSBenchmarkMultilingualSTS (es)": 65.33, + "STSBenchmarkMultilingualSTS (zh)": 38.93, + "STSBenchmarkMultilingualSTS (en)": 83.09, + "STSBenchmarkMultilingualSTS (fr)": 66.68, + "STSBenchmarkMultilingualSTS (it)": 60.71, + "STSBenchmarkMultilingualSTS (nl)": 60.03, + "STSBenchmarkMultilingualSTS (pt)": 63.85, + "STSBenchmarkMultilingualSTS (ru)": 56.09, + "STSBenchmarkMultilingualSTS (de)": 63.28, + "STSBenchmarkMultilingualSTS (pl)": 60.2 + }, + { + "Model": "all-MiniLM-L12-v2", + "STS17": 24.51, + "STS22": 44.39 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "xlm-roberta-base", - "SummEvalFr": 29.14 + "Model": "all-MiniLM-L12-v2", + "SummEval": 27.9, + "SummEvalFr": 26.63 + }, + { + "Model": "all-MiniLM-L12-v2", + "SummEval": 27.9, + "SummEvalFr": 26.63 } ] }, "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "bge-base-en-v1.5-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "accuracy": [ { - "Model": "bge-base-en-v1.5-instruct", - "ARCChallenge": 8.85, - "AlphaNLI": 4.13, - "HellaSwag": 24.03, - "PIQA": 23.03, - "Quail": 1.25, - "RARbCode": 46.32, - "RARbMath": 45.62, - "SIQA": 0.24, - "SpartQA": 2.67, - "TempReasonL1": 0.8, - "TempReasonL2Fact": 16.56, - "TempReasonL2Pure": 1.33, - "TempReasonL3Fact": 12.68, - "TempReasonL3Pure": 5.08, - "WinoGrande": 10.27 + "Model": "all-MiniLM-L12-v2", + "CEDRClassification": 33.86, + "SensitiveTopicsClassification": 18.05 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "all-MiniLM-L12-v2", + "Core17InstructionRetrieval": 1.69, + "News21InstructionRetrieval": -0.35, + "Robust04InstructionRetrieval": -3.59 + } + ] } }, - "voyage-law-2": { + "all-MiniLM-L6-v2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "all-MiniLM-L6-v2", + "BornholmBitextMining": 29.68, + "Tatoeba (gle-eng)": 2.75, + "Tatoeba (ell-eng)": 0.1, + "Tatoeba (heb-eng)": 0.22, + "Tatoeba (mon-eng)": 0.38, + "Tatoeba (lat-eng)": 5.04, + "Tatoeba (csb-eng)": 3.78, + "Tatoeba (dsb-eng)": 2.9, + "Tatoeba (tel-eng)": 0.46, + "Tatoeba (ara-eng)": 0.0, + "Tatoeba (lfn-eng)": 4.55, + "Tatoeba (bul-eng)": 0.21, + "Tatoeba (kur-eng)": 5.21, + "Tatoeba (fao-eng)": 5.33, + "Tatoeba (kat-eng)": 0.3, + "Tatoeba (hsb-eng)": 2.65, + "Tatoeba (tam-eng)": 0.33, + "Tatoeba (vie-eng)": 3.07, + "Tatoeba (pes-eng)": 0.0, + "Tatoeba (slk-eng)": 3.27, + "Tatoeba (bos-eng)": 5.58, + "Tatoeba (ukr-eng)": 0.3, + "Tatoeba (gsw-eng)": 11.33, + "Tatoeba (bre-eng)": 3.22, + "Tatoeba (uig-eng)": 0.2, + "Tatoeba (cbk-eng)": 7.04, + "Tatoeba (ile-eng)": 13.54, + "Tatoeba (lit-eng)": 0.92, + "Tatoeba (mar-eng)": 0.0, + "Tatoeba (tha-eng)": 0.3, + "Tatoeba (mhr-eng)": 0.0, + "Tatoeba (max-eng)": 6.93, + "Tatoeba (yid-eng)": 0.14, + "Tatoeba (khm-eng)": 0.42, + "Tatoeba (ina-eng)": 17.63, + "Tatoeba (ita-eng)": 9.9, + "Tatoeba (bel-eng)": 0.5, + "Tatoeba (srp-eng)": 1.28, + "Tatoeba (pol-eng)": 2.58, + "Tatoeba (slv-eng)": 3.25, + "Tatoeba (tzl-eng)": 4.58, + "Tatoeba (uzb-eng)": 2.34, + "Tatoeba (ido-eng)": 7.48, + "Tatoeba (rus-eng)": 0.07, + "Tatoeba (cha-eng)": 13.29, + "Tatoeba (wuu-eng)": 0.6, + "Tatoeba (urd-eng)": 0.1, + "Tatoeba (hye-eng)": 0.41, + "Tatoeba (ber-eng)": 4.69, + "Tatoeba (por-eng)": 8.29, + "Tatoeba (nov-eng)": 13.97, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (fra-eng)": 8.17, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (nds-eng)": 9.56, + "Tatoeba (tat-eng)": 0.44, + "Tatoeba (kab-eng)": 0.96, + "Tatoeba (jpn-eng)": 0.97, + "Tatoeba (mkd-eng)": 0.0, + "Tatoeba (yue-eng)": 0.86, + "Tatoeba (dtp-eng)": 1.88, + "Tatoeba (xho-eng)": 4.01, + "Tatoeba (ind-eng)": 3.86, + "Tatoeba (glg-eng)": 9.31, + "Tatoeba (zsm-eng)": 4.24, + "Tatoeba (swh-eng)": 5.8, + "Tatoeba (ast-eng)": 6.84, + "Tatoeba (spa-eng)": 5.63, + "Tatoeba (cat-eng)": 6.93, + "Tatoeba (pms-eng)": 7.62, + "Tatoeba (pam-eng)": 3.54, + "Tatoeba (kaz-eng)": 0.42, + "Tatoeba (hrv-eng)": 3.83, + "Tatoeba (epo-eng)": 5.46, + "Tatoeba (orv-eng)": 0.0, + "Tatoeba (swg-eng)": 8.92, + "Tatoeba (kzj-eng)": 2.78, + "Tatoeba (ang-eng)": 15.64, + "Tatoeba (cmn-eng)": 1.92, + "Tatoeba (nob-eng)": 4.34, + "Tatoeba (lvs-eng)": 2.61, + "Tatoeba (dan-eng)": 7.84, + "Tatoeba (aze-eng)": 1.04, + "Tatoeba (tur-eng)": 3.59, + "Tatoeba (sqi-eng)": 3.58, + "Tatoeba (hun-eng)": 3.56, + "Tatoeba (awa-eng)": 0.51, + "Tatoeba (afr-eng)": 5.89, + "Tatoeba (tuk-eng)": 3.52, + "Tatoeba (est-eng)": 2.36, + "Tatoeba (fry-eng)": 11.22, + "Tatoeba (cor-eng)": 2.41, + "Tatoeba (ceb-eng)": 3.39, + "Tatoeba (eus-eng)": 5.54, + "Tatoeba (amh-eng)": 0.25, + "Tatoeba (ron-eng)": 6.82, + "Tatoeba (oci-eng)": 6.55, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (arq-eng)": 0.11, + "Tatoeba (swe-eng)": 6.06, + "Tatoeba (nno-eng)": 5.38, + "Tatoeba (gla-eng)": 2.7, + "Tatoeba (kor-eng)": 0.45, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (fin-eng)": 2.79, + "Tatoeba (ces-eng)": 3.04, + "Tatoeba (deu-eng)": 7.89, + "Tatoeba (cym-eng)": 6.09, + "Tatoeba (war-eng)": 4.94, + "Tatoeba (isl-eng)": 2.37, + "Tatoeba (nld-eng)": 10.16, + "Tatoeba (tgl-eng)": 2.69, + "Tatoeba (jav-eng)": 3.37 + } + ] }, "Classification": { "accuracy": [ { - "Model": "voyage-law-2", - "AmazonReviewsClassification (fr)": 41.98, - "MTOPDomainClassification (fr)": 90.12, - "MTOPIntentClassification (fr)": 62.44, - "MasakhaNEWSClassification (fra)": 76.42, - "MassiveIntentClassification (fr)": 66.94, - "MassiveScenarioClassification (fr)": 72.78 + "Model": "all-MiniLM-L6-v2", + "AllegroReviews": 24.64, + "AmazonCounterfactualClassification": 64.15, + "AmazonCounterfactualClassification (en-ext)": 65.59, + "AmazonCounterfactualClassification (en)": 63.64, + "AmazonCounterfactualClassification (de)": 57.82, + "AmazonCounterfactualClassification (ja)": 60.9, + "AmazonPolarityClassification": 64.26, + "AmazonReviewsClassification": 31.79, + "AmazonReviewsClassification (en)": 30.85, + "AmazonReviewsClassification (de)": 26.44, + "AmazonReviewsClassification (es)": 27.35, + "AmazonReviewsClassification (fr)": 26.88, + "AmazonReviewsClassification (ja)": 23.78, + "AmazonReviewsClassification (zh)": 23.67, + "AngryTweetsClassification": 42.48, + "Banking77Classification": 80.04, + "CBD": 50.9, + "DKHateClassification": 55.05, + "DanishPoliticalCommentsClassification": 26.7, + "EmotionClassification": 40.83, + "GeoreviewClassification": 27.08, + "HeadlineClassification": 27.77, + "IFlyTek": 16.09, + "ImdbClassification": 61.76, + "InappropriatenessClassification": 51.73, + "JDReview": 59.98, + "KinopoiskClassification": 33.93, + "LccSentimentClassification": 38.53, + "MTOPDomainClassification": 91.56, + "MTOPDomainClassification (en)": 91.68, + "MTOPDomainClassification (de)": 70.47, + "MTOPDomainClassification (es)": 72.99, + "MTOPDomainClassification (fr)": 75.1, + "MTOPDomainClassification (hi)": 40.74, + "MTOPDomainClassification (th)": 15.66, + "MTOPIntentClassification": 62.18, + "MTOPIntentClassification (en)": 61.55, + "MTOPIntentClassification (de)": 45.7, + "MTOPIntentClassification (es)": 44.19, + "MTOPIntentClassification (fr)": 39.67, + "MTOPIntentClassification (hi)": 18.69, + "MTOPIntentClassification (th)": 5.03, + "MasakhaNEWSClassification": 74.05, + "MasakhaNEWSClassification (amh)": 33.03, + "MasakhaNEWSClassification (eng)": 77.11, + "MasakhaNEWSClassification (fra)": 68.84, + "MasakhaNEWSClassification (hau)": 50.49, + "MasakhaNEWSClassification (ibo)": 52.15, + "MasakhaNEWSClassification (lin)": 68.29, + "MasakhaNEWSClassification (lug)": 47.58, + "MasakhaNEWSClassification (orm)": 50.68, + "MasakhaNEWSClassification (pcm)": 92.56, + "MasakhaNEWSClassification (run)": 54.81, + "MasakhaNEWSClassification (sna)": 65.58, + "MasakhaNEWSClassification (som)": 39.8, + "MasakhaNEWSClassification (swa)": 47.25, + "MasakhaNEWSClassification (tir)": 28.97, + "MasakhaNEWSClassification (xho)": 54.14, + "MasakhaNEWSClassification (yor)": 55.01, + "MassiveIntentClassification": 38.1, + "MassiveIntentClassification (mn)": 20.35, + "MassiveIntentClassification (en)": 66.94, + "MassiveIntentClassification (ta)": 11.31, + "MassiveIntentClassification (ru)": 27.58, + "MassiveIntentClassification (fi)": 38.37, + "MassiveIntentClassification (el)": 24.19, + "MassiveIntentClassification (hi)": 17.7, + "MassiveIntentClassification (fr)": 42.55, + "MassiveIntentClassification (pt)": 43.76, + "MassiveIntentClassification (sv)": 38.09, + "MassiveIntentClassification (ml)": 2.89, + "MassiveIntentClassification (zh-TW)": 22.56, + "MassiveIntentClassification (vi)": 37.09, + "MassiveIntentClassification (is)": 29.95, + "MassiveIntentClassification (id)": 39.02, + "MassiveIntentClassification (jv)": 35.91, + "MassiveIntentClassification (ka)": 9.07, + "MassiveIntentClassification (tr)": 33.76, + "MassiveIntentClassification (he)": 22.48, + "MassiveIntentClassification (lv)": 36.97, + "MassiveIntentClassification (bn)": 13.1, + "MassiveIntentClassification (af)": 37.45, + "MassiveIntentClassification (sw)": 34.98, + "MassiveIntentClassification (te)": 2.46, + "MassiveIntentClassification (sq)": 40.7, + "MassiveIntentClassification (fa)": 19.1, + "MassiveIntentClassification (az)": 30.63, + "MassiveIntentClassification (ar)": 19.05, + "MassiveIntentClassification (de)": 43.44, + "MassiveIntentClassification (th)": 11.26, + "MassiveIntentClassification (cy)": 34.54, + "MassiveIntentClassification (da)": 41.0, + "MassiveIntentClassification (ko)": 16.05, + "MassiveIntentClassification (kn)": 3.14, + "MassiveIntentClassification (my)": 4.24, + "MassiveIntentClassification (nb)": 39.36, + "MassiveIntentClassification (zh-CN)": 24.4, + "MassiveIntentClassification (ro)": 40.54, + "MassiveIntentClassification (ja)": 31.87, + "MassiveIntentClassification (nl)": 40.2, + "MassiveIntentClassification (it)": 41.59, + "MassiveIntentClassification (ur)": 14.42, + "MassiveIntentClassification (am)": 2.62, + "MassiveIntentClassification (hu)": 35.69, + "MassiveIntentClassification (ms)": 35.07, + "MassiveIntentClassification (pl)": 36.07, + "MassiveIntentClassification (hy)": 7.62, + "MassiveIntentClassification (tl)": 37.92, + "MassiveIntentClassification (sl)": 36.7, + "MassiveIntentClassification (km)": 4.91, + "MassiveIntentClassification (es)": 39.88, + "MassiveScenarioClassification": 42.93, + "MassiveScenarioClassification (ka)": 14.92, + "MassiveScenarioClassification (tr)": 38.85, + "MassiveScenarioClassification (es)": 49.0, + "MassiveScenarioClassification (it)": 49.8, + "MassiveScenarioClassification (ta)": 17.37, + "MassiveScenarioClassification (sl)": 41.9, + "MassiveScenarioClassification (ar)": 25.99, + "MassiveScenarioClassification (he)": 24.01, + "MassiveScenarioClassification (is)": 36.12, + "MassiveScenarioClassification (cy)": 39.0, + "MassiveScenarioClassification (nl)": 48.43, + "MassiveScenarioClassification (ja)": 37.3, + "MassiveScenarioClassification (ml)": 7.67, + "MassiveScenarioClassification (da)": 47.02, + "MassiveScenarioClassification (kn)": 7.85, + "MassiveScenarioClassification (my)": 10.61, + "MassiveScenarioClassification (ms)": 43.67, + "MassiveScenarioClassification (mn)": 25.47, + "MassiveScenarioClassification (el)": 31.3, + "MassiveScenarioClassification (hi)": 23.71, + "MassiveScenarioClassification (hy)": 13.03, + "MassiveScenarioClassification (ro)": 48.23, + "MassiveScenarioClassification (hu)": 41.61, + "MassiveScenarioClassification (id)": 43.46, + "MassiveScenarioClassification (jv)": 43.59, + "MassiveScenarioClassification (km)": 9.25, + "MassiveScenarioClassification (zh-TW)": 31.18, + "MassiveScenarioClassification (vi)": 40.47, + "MassiveScenarioClassification (af)": 43.87, + "MassiveScenarioClassification (fr)": 51.14, + "MassiveScenarioClassification (am)": 7.57, + "MassiveScenarioClassification (fa)": 23.97, + "MassiveScenarioClassification (ko)": 20.3, + "MassiveScenarioClassification (tl)": 45.69, + "MassiveScenarioClassification (lv)": 40.43, + "MassiveScenarioClassification (sq)": 47.21, + "MassiveScenarioClassification (th)": 19.5, + "MassiveScenarioClassification (az)": 35.59, + "MassiveScenarioClassification (sw)": 43.32, + "MassiveScenarioClassification (pl)": 43.82, + "MassiveScenarioClassification (te)": 7.95, + "MassiveScenarioClassification (ru)": 30.46, + "MassiveScenarioClassification (bn)": 20.56, + "MassiveScenarioClassification (fi)": 42.38, + "MassiveScenarioClassification (pt)": 50.72, + "MassiveScenarioClassification (en)": 73.81, + "MassiveScenarioClassification (sv)": 42.95, + "MassiveScenarioClassification (nb)": 44.67, + "MassiveScenarioClassification (zh-CN)": 33.65, + "MassiveScenarioClassification (ur)": 23.73, + "MassiveScenarioClassification (de)": 51.47, + "MultilingualSentiment": 41.28, + "NoRecClassification": 37.93, + "NordicLangClassification": 54.7, + "NorwegianParliament": 54.8, + "OnlineShopping": 57.74, + "PAC": 59.78, + "PolEmo2.0-IN": 40.29, + "PolEmo2.0-OUT": 25.0, + "RuReviewsClassification": 41.79, + "RuSciBenchGRNTIClassification": 10.08, + "RuSciBenchOECDClassification": 8.3, + "ScalaDaClassification": 50.03, + "ScalaNbClassification": 50.17, + "TNews": 20.12, + "ToxicConversationsClassification": 62.09, + "TweetSentimentExtractionClassification": 54.04, + "Waimai": 62.72 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-law-2", - "AlloProfClusteringP2P": 62.5, - "AlloProfClusteringS2S": 44.28, - "HALClusteringS2S": 26.36, - "MLSUMClusteringP2P (fr)": 44.03, - "MLSUMClusteringS2S (fr)": 42.95, - "MasakhaNEWSClusteringP2P (fra)": 50.68, - "MasakhaNEWSClusteringS2S (fra)": 38.79 + "Model": "all-MiniLM-L6-v2", + "AlloProfClusteringP2P": 51.83, + "AlloProfClusteringS2S": 32.07, + "ArxivClusteringP2P": 46.55, + "ArxivClusteringS2S": 37.86, + "BiorxivClusteringP2P": 38.37, + "BiorxivClusteringS2S": 32.88, + "GeoreviewClusteringP2P": 20.25, + "HALClusteringS2S": 18.84, + "MLSUMClusteringP2P": 36.74, + "MLSUMClusteringP2P (ru)": 23.91, + "MLSUMClusteringS2S": 28.12, + "MLSUMClusteringS2S (ru)": 19.07, + "MasakhaNEWSClusteringP2P": 34.92, + "MasakhaNEWSClusteringP2P (amh)": 43.85, + "MasakhaNEWSClusteringP2P (eng)": 48.88, + "MasakhaNEWSClusteringP2P (fra)": 34.92, + "MasakhaNEWSClusteringP2P (hau)": 24.77, + "MasakhaNEWSClusteringP2P (ibo)": 45.94, + "MasakhaNEWSClusteringP2P (lin)": 69.56, + "MasakhaNEWSClusteringP2P (lug)": 49.4, + "MasakhaNEWSClusteringP2P (orm)": 25.34, + "MasakhaNEWSClusteringP2P (pcm)": 85.57, + "MasakhaNEWSClusteringP2P (run)": 50.75, + "MasakhaNEWSClusteringP2P (sna)": 41.68, + "MasakhaNEWSClusteringP2P (som)": 29.02, + "MasakhaNEWSClusteringP2P (swa)": 21.87, + "MasakhaNEWSClusteringP2P (tir)": 42.93, + "MasakhaNEWSClusteringP2P (xho)": 28.58, + "MasakhaNEWSClusteringP2P (yor)": 31.45, + "MasakhaNEWSClusteringS2S": 40.58, + "MasakhaNEWSClusteringS2S (amh)": 45.44, + "MasakhaNEWSClusteringS2S (eng)": 41.09, + "MasakhaNEWSClusteringS2S (fra)": 40.58, + "MasakhaNEWSClusteringS2S (hau)": 15.42, + "MasakhaNEWSClusteringS2S (ibo)": 37.02, + "MasakhaNEWSClusteringS2S (lin)": 65.14, + "MasakhaNEWSClusteringS2S (lug)": 44.21, + "MasakhaNEWSClusteringS2S (orm)": 24.79, + "MasakhaNEWSClusteringS2S (pcm)": 61.48, + "MasakhaNEWSClusteringS2S (run)": 51.25, + "MasakhaNEWSClusteringS2S (sna)": 42.74, + "MasakhaNEWSClusteringS2S (som)": 30.08, + "MasakhaNEWSClusteringS2S (swa)": 9.55, + "MasakhaNEWSClusteringS2S (tir)": 46.04, + "MasakhaNEWSClusteringS2S (xho)": 27.08, + "MasakhaNEWSClusteringS2S (yor)": 31.04, + "MedrxivClusteringP2P": 34.39, + "MedrxivClusteringS2S": 31.86, + "RedditClustering": 50.7, + "RedditClusteringP2P": 54.8, + "RuSciBenchGRNTIClusteringP2P": 10.21, + "RuSciBenchOECDClusteringP2P": 9.44, + "StackExchangeClustering": 53.14, + "StackExchangeClusteringP2P": 34.26, + "TwentyNewsgroupsClustering": 46.49 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-law-2", - "OpusparcusPC (fr)": 93.06, - "PawsXPairClassification (fr)": 61.54 + "Model": "all-MiniLM-L6-v2", + "CDSC-E": 47.27, + "OpusparcusPC (de)": 89.91, + "OpusparcusPC (en)": 97.46, + "OpusparcusPC (fi)": 85.44, + "OpusparcusPC (fr)": 86.53, + "OpusparcusPC (ru)": 79.28, + "OpusparcusPC (sv)": 83.78, + "PSC": 81.87, + "PawsXPairClassification (de)": 51.22, + "PawsXPairClassification (en)": 59.1, + "PawsXPairClassification (es)": 52.21, + "PawsXPairClassification (fr)": 55.41, + "PawsXPairClassification (ja)": 48.97, + "PawsXPairClassification (ko)": 50.53, + "PawsXPairClassification (zh)": 53.11, + "SICK-E-PL": 47.32, + "SprintDuplicateQuestions": 94.55, + "TERRa": 45.03, + "TwitterSemEval2015": 67.86, + "TwitterURLCorpus": 84.7 }, { - "Model": "voyage-law-2", - "OpusparcusPC (fr)": 93.06, - "PawsXPairClassification (fr)": 61.54 + "Model": "all-MiniLM-L6-v2", + "CDSC-E": 47.27, + "OpusparcusPC": 86.53, + "OpusparcusPC (de)": 89.91, + "OpusparcusPC (en)": 97.46, + "OpusparcusPC (fi)": 85.44, + "OpusparcusPC (fr)": 86.53, + "OpusparcusPC (ru)": 79.28, + "OpusparcusPC (sv)": 83.78, + "PSC": 81.87, + "PawsXPairClassification": 55.51, + "PawsXPairClassification (de)": 51.55, + "PawsXPairClassification (en)": 59.1, + "PawsXPairClassification (es)": 52.23, + "PawsXPairClassification (fr)": 55.52, + "PawsXPairClassification (ja)": 49.19, + "PawsXPairClassification (ko)": 50.59, + "PawsXPairClassification (zh)": 53.18, + "SICK-E-PL": 47.39, + "SprintDuplicateQuestions": 94.55, + "TERRa": 45.03, + "TwitterSemEval2015": 67.86, + "TwitterURLCorpus": 84.7 + }, + { + "Model": "all-MiniLM-L6-v2", + "OpusparcusPC": 86.53, + "PawsXPairClassification": 55.4, + "SprintDuplicateQuestions": 94.55, + "TwitterSemEval2015": 67.86, + "TwitterURLCorpus": 84.7 } ] }, "Reranking": { "map": [ { - "Model": "voyage-law-2", - "AlloprofReranking": 72.92, - "SyntecReranking": 91.2 + "Model": "all-MiniLM-L6-v2", + "AlloprofReranking": 62.62, + "AskUbuntuDupQuestions": 63.48, + "MMarcoReranking": 4.74, + "MindSmallReranking": 30.8, + "RuBQReranking": 27.05, + "SciDocsRR": 87.12, + "StackOverflowDupQuestions": 50.77, + "SyntecReranking": 67.31, + "T2Reranking": 56.26 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-law-2", - "AILACasedocs": 44.56, - "AILAStatutes": 45.51, - "AlloprofRetrieval": 57.28, - "BSARDRetrieval": 11.83, - "GerDaLIRSmall": 44.91, - "LEMBNarrativeQARetrieval": 55.78, - "LEMBQMSumRetrieval": 57.26, - "LEMBSummScreenFDRetrieval": 98.72, - "LEMBWikimQARetrieval": 87.08, - "LeCaRDv2": 72.75, - "LegalBenchConsumerContractsQA": 83.27, - "LegalBenchCorporateLobbying": 95.66, - "LegalQuAD": 67.47, - "LegalSummarization": 68.96, - "MintakaRetrieval (fr)": 34.92, - "SyntecRetrieval": 87.33, - "XPQARetrieval (fr)": 73.56 - }, - { - "Model": "voyage-law-2", - "LEMBNeedleRetrieval": 80.5, - "LEMBPasskeyRetrieval": 93.75 + "Model": "all-MiniLM-L6-v2", + "AILACasedocs": 19.72, + "AILAStatutes": 20.52, + "ARCChallenge": 9.48, + "AlloprofRetrieval": 28.41, + "AlphaNLI": 28.19, + "AppsRetrieval": 6.6, + "ArguAna": 50.17, + "ArguAna-PL": 11.5, + "BSARDRetrieval": 4.8, + "CQADupstackRetrieval": 41.32, + "ClimateFEVER": 20.27, + "CmedqaRetrieval": 2.03, + "CodeFeedbackMT": 35.62, + "CodeFeedbackST": 65.99, + "CodeSearchNetCCRetrieval (python)": 71.29, + "CodeSearchNetCCRetrieval (javascript)": 66.75, + "CodeSearchNetCCRetrieval (go)": 60.0, + "CodeSearchNetCCRetrieval (ruby)": 69.32, + "CodeSearchNetCCRetrieval (java)": 66.32, + "CodeSearchNetCCRetrieval (php)": 58.42, + "CodeSearchNetRetrieval (python)": 79.28, + "CodeSearchNetRetrieval (javascript)": 63.62, + "CodeSearchNetRetrieval (go)": 79.84, + "CodeSearchNetRetrieval (ruby)": 72.84, + "CodeSearchNetRetrieval (java)": 51.65, + "CodeSearchNetRetrieval (php)": 70.82, + "CodeTransOceanContest": 61.78, + "CodeTransOceanDL": 27.94, + "CosQA": 33.02, + "CovidRetrieval": 0.8, + "DBPedia": 32.33, + "DuRetrieval": 3.03, + "EcomRetrieval": 3.7, + "FEVER": 51.93, + "FiQA-PL": 2.29, + "FiQA2018": 36.87, + "GerDaLIRSmall": 2.41, + "HellaSwag": 24.21, + "HotpotQA": 46.51, + "LEMBNarrativeQARetrieval": 18.27, + "LEMBQMSumRetrieval": 16.32, + "LEMBSummScreenFDRetrieval": 54.8, + "LEMBWikimQARetrieval": 46.23, + "LeCaRDv2": 17.5, + "LegalBenchConsumerContractsQA": 65.6, + "LegalBenchCorporateLobbying": 86.41, + "LegalQuAD": 11.81, + "LegalSummarization": 59.0, + "MIRACLRetrieval (ru)": 0.39, + "MMarcoRetrieval": 6.21, + "MSMARCO": 36.54, + "MedicalRetrieval": 1.76, + "MintakaRetrieval": 9.19, + "MintakaRetrieval (ar)": 2.22, + "MintakaRetrieval (de)": 15.43, + "MintakaRetrieval (es)": 7.72, + "MintakaRetrieval (fr)": 9.19, + "MintakaRetrieval (hi)": 2.65, + "MintakaRetrieval (it)": 8.48, + "MintakaRetrieval (ja)": 6.72, + "MintakaRetrieval (pt)": 9.76, + "NFCorpus": 31.59, + "NFCorpus-PL": 10.62, + "NQ": 43.87, + "PIQA": 25.28, + "Quail": 3.92, + "QuoraRetrieval": 87.56, + "RARbCode": 44.27, + "RARbMath": 68.19, + "RiaNewsRetrieval": 0.67, + "RuBQRetrieval": 2.64, + "SCIDOCS": 21.64, + "SCIDOCS-PL": 3.75, + "SIQA": 1.56, + "SciFact": 64.51, + "SciFact-PL": 16.14, + "SpartQA": 1.65, + "StackOverflowQA": 83.96, + "SyntecRetrieval": 60.15, + "SyntheticText2SQL": 44.35, + "T2Retrieval": 1.6, + "TRECCOVID": 47.23, + "TRECCOVID-PL": 8.66, + "TempReasonL1": 1.53, + "TempReasonL2Fact": 17.65, + "TempReasonL2Pure": 0.46, + "TempReasonL3Fact": 14.16, + "TempReasonL3Pure": 6.33, + "Touche2020": 16.9, + "VideoRetrieval": 9.79, + "WinoGrande": 47.35, + "XPQARetrieval": 51.79, + "XPQARetrieval (ara-ara)": 8.05, + "XPQARetrieval (eng-ara)": 1.9, + "XPQARetrieval (ara-eng)": 6.87, + "XPQARetrieval (deu-deu)": 53.25, + "XPQARetrieval (eng-deu)": 10.99, + "XPQARetrieval (deu-eng)": 27.59, + "XPQARetrieval (spa-spa)": 38.87, + "XPQARetrieval (eng-spa)": 5.46, + "XPQARetrieval (spa-eng)": 22.2, + "XPQARetrieval (fra-fra)": 51.79, + "XPQARetrieval (eng-fra)": 8.57, + "XPQARetrieval (fra-eng)": 31.36, + "XPQARetrieval (hin-hin)": 35.28, + "XPQARetrieval (eng-hin)": 6.28, + "XPQARetrieval (hin-eng)": 6.0, + "XPQARetrieval (ita-ita)": 54.57, + "XPQARetrieval (eng-ita)": 6.79, + "XPQARetrieval (ita-eng)": 24.13, + "XPQARetrieval (jpn-jpn)": 39.23, + "XPQARetrieval (eng-jpn)": 4.1, + "XPQARetrieval (jpn-eng)": 13.05, + "XPQARetrieval (kor-kor)": 10.2, + "XPQARetrieval (eng-kor)": 5.72, + "XPQARetrieval (kor-eng)": 6.37, + "XPQARetrieval (pol-pol)": 22.33, + "XPQARetrieval (eng-pol)": 7.58, + "XPQARetrieval (pol-eng)": 14.43, + "XPQARetrieval (por-por)": 31.93, + "XPQARetrieval (eng-por)": 5.9, + "XPQARetrieval (por-eng)": 20.74, + "XPQARetrieval (tam-tam)": 7.39, + "XPQARetrieval (eng-tam)": 3.42, + "XPQARetrieval (tam-eng)": 2.91, + "XPQARetrieval (cmn-cmn)": 19.41, + "XPQARetrieval (eng-cmn)": 5.05, + "XPQARetrieval (cmn-eng)": 8.77 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-law-2", - "SICKFr": 74.09, - "STS22 (fr)": 83.75, - "STSBenchmarkMultilingualSTS (fr)": 83.02 + "Model": "all-MiniLM-L6-v2", + "AFQMC": 8.59, + "ATEC": 13.52, + "BIOSSES": 81.64, + "BQ": 23.84, + "CDSC-R": 79.45, + "LCQMC": 23.85, + "PAWSX": 7.21, + "RUParaPhraserSTS": 43.93, + "RuSTSBenchmarkSTS": 55.56, + "SICK-R": 77.58, + "SICK-R-PL": 52.43, + "SICKFr": 62.48, + "STS12": 72.37, + "STS13": 80.6, + "STS14": 75.59, + "STS15": 85.39, + "STS16": 78.99, + "STS17 (es-es)": 76.12, + "STS17 (en-de)": 35.82, + "STS17 (it-en)": 24.45, + "STS17 (ar-ar)": 50.89, + "STS17 (fr-en)": 37.09, + "STS17 (en-tr)": 4.5, + "STS17 (en-en)": 87.59, + "STS17 (en-ar)": -4.28, + "STS17 (nl-en)": 29.0, + "STS17 (ko-ko)": 43.39, + "STS17 (es-en)": 16.31, + "STS22 (ar)": 22.64, + "STS22 (ru)": 14.72, + "STS22 (en)": 67.71, + "STS22 (tr)": 33.69, + "STS22 (pl)": 26.77, + "STS22 (es-en)": 53.42, + "STS22 (zh)": 44.93, + "STS22 (es)": 54.78, + "STS22 (de-fr)": 30.07, + "STS22 (de-pl)": -4.93, + "STS22 (fr)": 77.0, + "STS22 (de)": 31.04, + "STS22 (pl-en)": 32.8, + "STS22 (de-en)": 44.04, + "STS22 (zh-en)": 41.64, + "STS22 (es-it)": 44.27, + "STS22 (fr-pl)": 50.71, + "STS22 (it)": 60.4, + "STSB": 37.8, + "STSBenchmark": 82.03, + "STSBenchmarkMultilingualSTS (pl)": 56.42, + "STSBenchmarkMultilingualSTS (zh)": 39.74, + "STSBenchmarkMultilingualSTS (en)": 82.03, + "STSBenchmarkMultilingualSTS (nl)": 55.46, + "STSBenchmarkMultilingualSTS (it)": 59.24, + "STSBenchmarkMultilingualSTS (pt)": 61.56, + "STSBenchmarkMultilingualSTS (de)": 62.4, + "STSBenchmarkMultilingualSTS (fr)": 64.93, + "STSBenchmarkMultilingualSTS (ru)": 55.55, + "STSBenchmarkMultilingualSTS (es)": 61.62 + }, + { + "Model": "all-MiniLM-L6-v2", + "BIOSSES": 81.64, + "SICK-R": 77.58, + "SICKFr": 62.48, + "STS12": 72.37, + "STS13": 80.6, + "STS14": 75.59, + "STS15": 85.39, + "STS16": 78.99, + "STS17": 29.0, + "STS22": 41.64, + "STSBenchmark": 82.03, + "STSBenchmarkMultilingualSTS": 64.93 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "voyage-law-2", - "SummEvalFr": 30.34 + "Model": "all-MiniLM-L6-v2", + "SummEval": 30.81, + "SummEvalFr": 28.28 + }, + { + "Model": "all-MiniLM-L6-v2", + "SummEval": 30.81, + "SummEvalFr": 28.29 + }, + { + "Model": "all-MiniLM-L6-v2", + "SummEval": 30.81, + "SummEvalFr": 28.29 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "all-MiniLM-L6-v2", + "CEDRClassification": 32.72, + "SensitiveTopicsClassification": 17.82 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "all-MiniLM-L6-v2", + "Core17InstructionRetrieval": -0.35, + "News21InstructionRetrieval": -0.25, + "Robust04InstructionRetrieval": -7.93 + } + ] } }, - "e5-base-4k": { + "all-MiniLM-L6-v2-instruct": { "BitextMining": { "f1": [] }, @@ -4279,16 +5136,22 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-base-4k", - "LEMBNarrativeQARetrieval": 30.35, - "LEMBQMSumRetrieval": 35.6, - "LEMBSummScreenFDRetrieval": 95.23, - "LEMBWikimQARetrieval": 69.19 - }, - { - "Model": "e5-base-4k", - "LEMBNeedleRetrieval": 41.5, - "LEMBPasskeyRetrieval": 67.25 + "Model": "all-MiniLM-L6-v2-instruct", + "ARCChallenge": 9.4, + "AlphaNLI": 15.09, + "HellaSwag": 20.51, + "PIQA": 24.68, + "Quail": 3.46, + "RARbCode": 42.47, + "RARbMath": 62.39, + "SIQA": 1.53, + "SpartQA": 0.57, + "TempReasonL1": 1.05, + "TempReasonL2Fact": 16.57, + "TempReasonL2Pure": 0.49, + "TempReasonL3Fact": 14.01, + "TempReasonL3Pure": 6.27, + "WinoGrande": 20.73 } ] }, @@ -4305,98 +5168,685 @@ "p-MRR": [] } }, - "Cohere-embed-english-v3.0": { + "all-mpnet-base-v2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "all-mpnet-base-v2", + "BornholmBitextMining": 27.44, + "Tatoeba (pol-eng)": 4.09, + "Tatoeba (ita-eng)": 11.1, + "Tatoeba (cat-eng)": 9.44, + "Tatoeba (aze-eng)": 1.49, + "Tatoeba (eus-eng)": 3.94, + "Tatoeba (epo-eng)": 7.15, + "Tatoeba (lit-eng)": 1.02, + "Tatoeba (ast-eng)": 9.78, + "Tatoeba (bul-eng)": 0.35, + "Tatoeba (ceb-eng)": 4.41, + "Tatoeba (mkd-eng)": 0.0, + "Tatoeba (tzl-eng)": 3.55, + "Tatoeba (zsm-eng)": 4.75, + "Tatoeba (mhr-eng)": 0.17, + "Tatoeba (pam-eng)": 4.32, + "Tatoeba (amh-eng)": 0.0, + "Tatoeba (slv-eng)": 3.73, + "Tatoeba (lvs-eng)": 2.98, + "Tatoeba (sqi-eng)": 3.45, + "Tatoeba (orv-eng)": 0.0, + "Tatoeba (vie-eng)": 4.96, + "Tatoeba (pes-eng)": 0.2, + "Tatoeba (por-eng)": 10.48, + "Tatoeba (dtp-eng)": 3.54, + "Tatoeba (yid-eng)": 0.08, + "Tatoeba (isl-eng)": 3.86, + "Tatoeba (cha-eng)": 12.2, + "Tatoeba (ron-eng)": 7.34, + "Tatoeba (hye-eng)": 0.14, + "Tatoeba (mar-eng)": 0.11, + "Tatoeba (hin-eng)": 0.02, + "Tatoeba (kor-eng)": 0.32, + "Tatoeba (srp-eng)": 1.89, + "Tatoeba (csb-eng)": 4.19, + "Tatoeba (jpn-eng)": 1.71, + "Tatoeba (ber-eng)": 4.56, + "Tatoeba (wuu-eng)": 0.91, + "Tatoeba (jav-eng)": 3.17, + "Tatoeba (nob-eng)": 4.37, + "Tatoeba (bre-eng)": 3.65, + "Tatoeba (kzj-eng)": 3.62, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (ces-eng)": 3.56, + "Tatoeba (cbk-eng)": 9.33, + "Tatoeba (gla-eng)": 2.04, + "Tatoeba (war-eng)": 5.14, + "Tatoeba (swh-eng)": 6.01, + "Tatoeba (swg-eng)": 7.86, + "Tatoeba (glg-eng)": 12.0, + "Tatoeba (fao-eng)": 7.08, + "Tatoeba (gsw-eng)": 10.67, + "Tatoeba (rus-eng)": 0.14, + "Tatoeba (kaz-eng)": 0.52, + "Tatoeba (gle-eng)": 2.19, + "Tatoeba (slk-eng)": 3.4, + "Tatoeba (nno-eng)": 5.75, + "Tatoeba (cor-eng)": 2.42, + "Tatoeba (nov-eng)": 16.61, + "Tatoeba (swe-eng)": 6.55, + "Tatoeba (max-eng)": 6.46, + "Tatoeba (oci-eng)": 8.57, + "Tatoeba (lfn-eng)": 6.1, + "Tatoeba (fra-eng)": 16.9, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (bel-eng)": 0.65, + "Tatoeba (lat-eng)": 5.78, + "Tatoeba (cmn-eng)": 2.22, + "Tatoeba (kat-eng)": 0.43, + "Tatoeba (bos-eng)": 4.6, + "Tatoeba (xho-eng)": 3.3, + "Tatoeba (tha-eng)": 0.0, + "Tatoeba (cym-eng)": 4.88, + "Tatoeba (deu-eng)": 11.46, + "Tatoeba (awa-eng)": 0.44, + "Tatoeba (ido-eng)": 9.84, + "Tatoeba (tat-eng)": 0.24, + "Tatoeba (kab-eng)": 1.31, + "Tatoeba (uzb-eng)": 1.98, + "Tatoeba (heb-eng)": 0.28, + "Tatoeba (ara-eng)": 0.1, + "Tatoeba (fry-eng)": 12.43, + "Tatoeba (afr-eng)": 6.08, + "Tatoeba (kur-eng)": 3.65, + "Tatoeba (pms-eng)": 7.63, + "Tatoeba (ell-eng)": 0.0, + "Tatoeba (spa-eng)": 10.12, + "Tatoeba (dsb-eng)": 2.96, + "Tatoeba (uig-eng)": 0.33, + "Tatoeba (nld-eng)": 9.29, + "Tatoeba (tel-eng)": 0.73, + "Tatoeba (hrv-eng)": 3.77, + "Tatoeba (nds-eng)": 10.96, + "Tatoeba (hun-eng)": 3.23, + "Tatoeba (est-eng)": 2.35, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (khm-eng)": 0.28, + "Tatoeba (hsb-eng)": 3.12, + "Tatoeba (tgl-eng)": 4.06, + "Tatoeba (ang-eng)": 9.77, + "Tatoeba (tur-eng)": 3.16, + "Tatoeba (tuk-eng)": 2.23, + "Tatoeba (ile-eng)": 17.84, + "Tatoeba (mon-eng)": 0.81, + "Tatoeba (yue-eng)": 1.16, + "Tatoeba (ina-eng)": 22.55, + "Tatoeba (tam-eng)": 0.73, + "Tatoeba (ukr-eng)": 0.5, + "Tatoeba (dan-eng)": 10.01, + "Tatoeba (arq-eng)": 0.33, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (fin-eng)": 3.82, + "Tatoeba (ind-eng)": 4.88 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "all-mpnet-base-v2", + "AllegroReviews": 22.99, + "AmazonCounterfactualClassification (en-ext)": 67.5, + "AmazonCounterfactualClassification (en)": 65.03, + "AmazonCounterfactualClassification (de)": 55.66, + "AmazonCounterfactualClassification (ja)": 60.69, + "AmazonPolarityClassification": 67.14, + "AmazonReviewsClassification (en)": 31.44, + "AmazonReviewsClassification (de)": 26.05, + "AmazonReviewsClassification (es)": 27.73, + "AmazonReviewsClassification (fr)": 28.49, + "AmazonReviewsClassification (ja)": 23.65, + "AmazonReviewsClassification (zh)": 23.62, + "AngryTweetsClassification": 44.13, + "Banking77Classification": 81.7, + "CBD": 50.25, + "DanishPoliticalCommentsClassification": 28.31, + "EmotionClassification": 42.22, + "GeoreviewClassification": 25.93, + "HeadlineClassification": 28.53, + "IFlyTek": 17.18, + "ImdbClassification": 71.17, + "InappropriatenessClassification": 51.82, + "JDReview": 60.19, + "KinopoiskClassification": 34.18, + "LccSentimentClassification": 39.27, + "MTOPDomainClassification (en)": 91.89, + "MTOPDomainClassification (de)": 71.86, + "MTOPDomainClassification (es)": 71.3, + "MTOPDomainClassification (fr)": 74.88, + "MTOPDomainClassification (hi)": 39.93, + "MTOPDomainClassification (th)": 17.54, + "MTOPIntentClassification (en)": 68.27, + "MTOPIntentClassification (de)": 44.36, + "MTOPIntentClassification (es)": 39.48, + "MTOPIntentClassification (fr)": 37.57, + "MTOPIntentClassification (hi)": 18.63, + "MTOPIntentClassification (th)": 5.39, + "MasakhaNEWSClassification (amh)": 36.49, + "MasakhaNEWSClassification (eng)": 79.75, + "MasakhaNEWSClassification (fra)": 77.77, + "MasakhaNEWSClassification (hau)": 59.22, + "MasakhaNEWSClassification (ibo)": 61.64, + "MasakhaNEWSClassification (lin)": 74.0, + "MasakhaNEWSClassification (lug)": 58.43, + "MasakhaNEWSClassification (orm)": 48.15, + "MasakhaNEWSClassification (pcm)": 92.2, + "MasakhaNEWSClassification (run)": 64.72, + "MasakhaNEWSClassification (sna)": 73.69, + "MasakhaNEWSClassification (som)": 49.97, + "MasakhaNEWSClassification (swa)": 55.15, + "MasakhaNEWSClassification (tir)": 27.46, + "MasakhaNEWSClassification (xho)": 60.98, + "MasakhaNEWSClassification (yor)": 63.33, + "MassiveIntentClassification": 69.57, + "MassiveIntentClassification (ar)": 20.42, + "MassiveIntentClassification (is)": 31.46, + "MassiveIntentClassification (ru)": 23.98, + "MassiveIntentClassification (hu)": 34.38, + "MassiveIntentClassification (pl)": 34.26, + "MassiveIntentClassification (lv)": 35.08, + "MassiveIntentClassification (ms)": 30.53, + "MassiveIntentClassification (id)": 36.31, + "MassiveIntentClassification (kn)": 3.76, + "MassiveIntentClassification (ta)": 9.25, + "MassiveIntentClassification (ro)": 38.07, + "MassiveIntentClassification (pt)": 42.83, + "MassiveIntentClassification (ja)": 33.13, + "MassiveIntentClassification (tl)": 36.33, + "MassiveIntentClassification (am)": 2.4, + "MassiveIntentClassification (fi)": 34.58, + "MassiveIntentClassification (hy)": 10.11, + "MassiveIntentClassification (nl)": 38.49, + "MassiveIntentClassification (tr)": 32.02, + "MassiveIntentClassification (en)": 69.76, + "MassiveIntentClassification (ur)": 12.86, + "MassiveIntentClassification (cy)": 30.82, + "MassiveIntentClassification (fr)": 44.27, + "MassiveIntentClassification (az)": 28.92, + "MassiveIntentClassification (bn)": 12.35, + "MassiveIntentClassification (mn)": 19.65, + "MassiveIntentClassification (it)": 40.29, + "MassiveIntentClassification (te)": 2.26, + "MassiveIntentClassification (ka)": 7.66, + "MassiveIntentClassification (hi)": 17.68, + "MassiveIntentClassification (fa)": 22.45, + "MassiveIntentClassification (sv)": 39.02, + "MassiveIntentClassification (he)": 23.6, + "MassiveIntentClassification (sq)": 37.26, + "MassiveIntentClassification (ml)": 2.62, + "MassiveIntentClassification (vi)": 31.47, + "MassiveIntentClassification (my)": 4.6, + "MassiveIntentClassification (jv)": 31.75, + "MassiveIntentClassification (zh-CN)": 24.36, + "MassiveIntentClassification (sw)": 31.82, + "MassiveIntentClassification (nb)": 39.3, + "MassiveIntentClassification (zh-TW)": 22.43, + "MassiveIntentClassification (el)": 24.52, + "MassiveIntentClassification (de)": 44.54, + "MassiveIntentClassification (th)": 8.51, + "MassiveIntentClassification (da)": 42.36, + "MassiveIntentClassification (af)": 36.49, + "MassiveIntentClassification (es)": 39.75, + "MassiveIntentClassification (ko)": 13.35, + "MassiveIntentClassification (sl)": 34.49, + "MassiveIntentClassification (km)": 4.76, + "MassiveScenarioClassification": 76.01, + "MassiveScenarioClassification (ko)": 17.28, + "MassiveScenarioClassification (sv)": 44.53, + "MassiveScenarioClassification (hy)": 16.86, + "MassiveScenarioClassification (nb)": 45.75, + "MassiveScenarioClassification (pl)": 42.66, + "MassiveScenarioClassification (id)": 43.05, + "MassiveScenarioClassification (it)": 51.37, + "MassiveScenarioClassification (tl)": 47.04, + "MassiveScenarioClassification (jv)": 40.0, + "MassiveScenarioClassification (lv)": 39.28, + "MassiveScenarioClassification (my)": 10.8, + "MassiveScenarioClassification (pt)": 52.06, + "MassiveScenarioClassification (te)": 7.81, + "MassiveScenarioClassification (de)": 54.09, + "MassiveScenarioClassification (fa)": 27.8, + "MassiveScenarioClassification (hi)": 23.13, + "MassiveScenarioClassification (hu)": 41.01, + "MassiveScenarioClassification (vi)": 35.9, + "MassiveScenarioClassification (fr)": 54.26, + "MassiveScenarioClassification (ja)": 40.57, + "MassiveScenarioClassification (th)": 17.01, + "MassiveScenarioClassification (sw)": 40.34, + "MassiveScenarioClassification (el)": 33.85, + "MassiveScenarioClassification (az)": 36.42, + "MassiveScenarioClassification (he)": 25.49, + "MassiveScenarioClassification (en)": 75.67, + "MassiveScenarioClassification (ka)": 13.45, + "MassiveScenarioClassification (af)": 43.63, + "MassiveScenarioClassification (bn)": 17.49, + "MassiveScenarioClassification (cy)": 34.82, + "MassiveScenarioClassification (mn)": 25.58, + "MassiveScenarioClassification (tr)": 39.11, + "MassiveScenarioClassification (ta)": 14.55, + "MassiveScenarioClassification (ar)": 27.8, + "MassiveScenarioClassification (ms)": 37.28, + "MassiveScenarioClassification (zh-TW)": 31.7, + "MassiveScenarioClassification (da)": 49.45, + "MassiveScenarioClassification (kn)": 8.34, + "MassiveScenarioClassification (ur)": 20.0, + "MassiveScenarioClassification (zh-CN)": 35.33, + "MassiveScenarioClassification (am)": 7.43, + "MassiveScenarioClassification (ro)": 47.86, + "MassiveScenarioClassification (fi)": 38.41, + "MassiveScenarioClassification (is)": 39.36, + "MassiveScenarioClassification (sq)": 44.67, + "MassiveScenarioClassification (es)": 50.92, + "MassiveScenarioClassification (ml)": 7.69, + "MassiveScenarioClassification (sl)": 39.88, + "MassiveScenarioClassification (nl)": 47.79, + "MassiveScenarioClassification (km)": 9.63, + "MassiveScenarioClassification (ru)": 28.71, + "MultilingualSentiment": 41.2, + "NoRecClassification": 38.34, + "NordicLangClassification": 50.15, + "OnlineShopping": 56.94, + "PAC": 62.1, + "PolEmo2.0-IN": 41.63, + "PolEmo2.0-OUT": 25.0, + "RuReviewsClassification": 42.33, + "RuSciBenchGRNTIClassification": 13.29, + "RuSciBenchOECDClassification": 10.62, + "TNews": 21.05, + "ToxicConversationsClassification": 61.05, + "TweetSentimentExtractionClassification": 55.05, + "Waimai": 63.31 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "all-mpnet-base-v2", + "ArxivClusteringP2P": 48.38, + "ArxivClusteringS2S": 39.72, + "BiorxivClusteringP2P": 39.62, + "BiorxivClusteringS2S": 35.02, + "GeoreviewClusteringP2P": 20.33, + "MasakhaNEWSClusteringP2P (amh)": 42.49, + "MasakhaNEWSClusteringP2P (eng)": 67.24, + "MasakhaNEWSClusteringP2P (fra)": 61.99, + "MasakhaNEWSClusteringP2P (hau)": 37.17, + "MasakhaNEWSClusteringP2P (ibo)": 52.0, + "MasakhaNEWSClusteringP2P (lin)": 69.68, + "MasakhaNEWSClusteringP2P (lug)": 50.96, + "MasakhaNEWSClusteringP2P (orm)": 28.42, + "MasakhaNEWSClusteringP2P (pcm)": 64.01, + "MasakhaNEWSClusteringP2P (run)": 57.6, + "MasakhaNEWSClusteringP2P (sna)": 54.99, + "MasakhaNEWSClusteringP2P (som)": 31.16, + "MasakhaNEWSClusteringP2P (swa)": 28.29, + "MasakhaNEWSClusteringP2P (tir)": 41.85, + "MasakhaNEWSClusteringP2P (xho)": 35.24, + "MasakhaNEWSClusteringP2P (yor)": 42.15, + "MasakhaNEWSClusteringS2S (amh)": 44.48, + "MasakhaNEWSClusteringS2S (eng)": 35.69, + "MasakhaNEWSClusteringS2S (fra)": 41.05, + "MasakhaNEWSClusteringS2S (hau)": 16.64, + "MasakhaNEWSClusteringS2S (ibo)": 38.63, + "MasakhaNEWSClusteringS2S (lin)": 70.72, + "MasakhaNEWSClusteringS2S (lug)": 46.97, + "MasakhaNEWSClusteringS2S (orm)": 23.85, + "MasakhaNEWSClusteringS2S (pcm)": 68.7, + "MasakhaNEWSClusteringS2S (run)": 52.27, + "MasakhaNEWSClusteringS2S (sna)": 47.64, + "MasakhaNEWSClusteringS2S (som)": 30.94, + "MasakhaNEWSClusteringS2S (swa)": 17.12, + "MasakhaNEWSClusteringS2S (tir)": 42.01, + "MasakhaNEWSClusteringS2S (xho)": 24.16, + "MasakhaNEWSClusteringS2S (yor)": 35.04, + "MedrxivClusteringP2P": 35.58, + "MedrxivClusteringS2S": 32.87, + "RedditClustering": 54.82, + "RedditClusteringP2P": 56.77, + "RuSciBenchGRNTIClusteringP2P": 14.66, + "RuSciBenchOECDClusteringP2P": 12.49, + "StackExchangeClustering": 53.8, + "StackExchangeClusteringP2P": 34.28, + "TwentyNewsgroupsClustering": 49.74 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "all-mpnet-base-v2", + "CDSC-E": 45.37, + "OpusparcusPC (de)": 89.78, + "OpusparcusPC (en)": 97.75, + "OpusparcusPC (fi)": 85.82, + "OpusparcusPC (fr)": 86.61, + "OpusparcusPC (ru)": 79.85, + "OpusparcusPC (sv)": 81.81, + "PSC": 83.28, + "PawsXPairClassification (de)": 52.17, + "PawsXPairClassification (en)": 61.99, + "PawsXPairClassification (es)": 55.06, + "PawsXPairClassification (fr)": 56.42, + "PawsXPairClassification (ja)": 47.43, + "PawsXPairClassification (ko)": 49.75, + "PawsXPairClassification (zh)": 52.47, + "SICK-E-PL": 46.51, + "SprintDuplicateQuestions": 90.15, + "TERRa": 44.52, + "TwitterSemEval2015": 73.85, + "TwitterURLCorpus": 85.11 + }, + { + "Model": "all-mpnet-base-v2", + "CDSC-E": 45.37, + "OpusparcusPC (de)": 89.78, + "OpusparcusPC (en)": 97.75, + "OpusparcusPC (fi)": 85.82, + "OpusparcusPC (fr)": 86.61, + "OpusparcusPC (ru)": 79.93, + "OpusparcusPC (sv)": 81.81, + "PSC": 83.28, + "PawsXPairClassification (de)": 52.28, + "PawsXPairClassification (en)": 61.99, + "PawsXPairClassification (es)": 55.06, + "PawsXPairClassification (fr)": 56.42, + "PawsXPairClassification (ja)": 47.6, + "PawsXPairClassification (ko)": 49.84, + "PawsXPairClassification (zh)": 52.51, + "SICK-E-PL": 46.55, + "SprintDuplicateQuestions": 90.15, + "TERRa": 44.52, + "TwitterSemEval2015": 73.87, + "TwitterURLCorpus": 85.11 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "all-mpnet-base-v2", + "AlloprofReranking": 69.63, + "AskUbuntuDupQuestions": 65.85, + "MMarcoReranking": 4.65, + "MindSmallReranking": 30.97, + "RuBQReranking": 30.96, + "SciDocsRR": 88.65, + "StackOverflowDupQuestions": 51.98, + "SyntecReranking": 66.12, + "T2Reranking": 58.3 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Cohere-embed-english-v3.0", - "AILACasedocs": 31.54, - "AILAStatutes": 27.15, - "ARCChallenge": 9.89, - "AlphaNLI": 15.1, - "BrightRetrieval (psychology)": 21.82, - "BrightRetrieval (economics)": 20.18, - "BrightRetrieval (robotics)": 16.21, - "BrightRetrieval (biology)": 18.98, - "BrightRetrieval (stackoverflow)": 16.47, - "BrightRetrieval (theoremqa_theorems)": 7.14, - "BrightRetrieval (pony)": 1.77, - "BrightRetrieval (sustainable_living)": 17.69, - "BrightRetrieval (aops)": 6.46, - "BrightRetrieval (theoremqa_questions)": 15.07, - "BrightRetrieval (leetcode)": 26.78, - "BrightRetrieval (earth_science)": 27.45, - "GerDaLIRSmall": 6.05, - "HellaSwag": 26.35, - "LeCaRDv2": 21.02, - "LegalBenchConsumerContractsQA": 77.12, - "LegalBenchCorporateLobbying": 93.68, - "LegalQuAD": 26.08, - "LegalSummarization": 61.7, - "PIQA": 28.49, - "Quail": 4.1, - "RARbCode": 57.19, - "RARbMath": 72.26, - "SIQA": 4.26, - "SpartQA": 3.75, - "TempReasonL1": 1.5, - "TempReasonL2Fact": 35.91, - "TempReasonL2Pure": 1.89, - "TempReasonL3Fact": 27.51, - "TempReasonL3Pure": 8.53, - "WinoGrande": 58.01 + "Model": "all-mpnet-base-v2", + "AILACasedocs": 22.51, + "AILAStatutes": 21.27, + "ARCChallenge": 11.8, + "AlloprofRetrieval": 34.27, + "AlphaNLI": 22.41, + "AppsRetrieval": 8.41, + "ArguAna": 46.52, + "ArguAna-PL": 14.72, + "BSARDRetrieval": 6.98, + "BrightRetrieval (robotics)": 8.36, + "BrightRetrieval (psychology)": 22.63, + "BrightRetrieval (leetcode)": 26.4, + "BrightRetrieval (biology)": 15.52, + "BrightRetrieval (theoremqa_questions)": 18.49, + "BrightRetrieval (economics)": 16.64, + "BrightRetrieval (stackoverflow)": 9.48, + "BrightRetrieval (pony)": 6.95, + "BrightRetrieval (earth_science)": 20.11, + "BrightRetrieval (theoremqa_theorems)": 9.9, + "BrightRetrieval (sustainable_living)": 15.34, + "BrightRetrieval (aops)": 5.32, + "CQADupstackRetrieval": 44.96, + "ClimateFEVER": 21.97, + "CmedqaRetrieval": 2.0, + "CodeFeedbackMT": 37.72, + "CodeFeedbackST": 69.32, + "CodeSearchNetCCRetrieval (python)": 71.83, + "CodeSearchNetCCRetrieval (javascript)": 67.99, + "CodeSearchNetCCRetrieval (go)": 61.44, + "CodeSearchNetCCRetrieval (ruby)": 66.42, + "CodeSearchNetCCRetrieval (java)": 68.88, + "CodeSearchNetCCRetrieval (php)": 59.22, + "CodeSearchNetRetrieval (python)": 81.01, + "CodeSearchNetRetrieval (javascript)": 65.58, + "CodeSearchNetRetrieval (go)": 88.25, + "CodeSearchNetRetrieval (ruby)": 74.53, + "CodeSearchNetRetrieval (java)": 65.11, + "CodeSearchNetRetrieval (php)": 74.47, + "CodeTransOceanContest": 70.58, + "CodeTransOceanDL": 29.45, + "CosQA": 33.71, + "CovidRetrieval": 3.7, + "DBPedia": 32.09, + "DuRetrieval": 4.92, + "EcomRetrieval": 3.94, + "FEVER": 50.86, + "FiQA-PL": 3.6, + "FiQA2018": 49.96, + "GerDaLIRSmall": 3.78, + "HellaSwag": 26.27, + "HotpotQA": 39.29, + "LEMBNarrativeQARetrieval": 19.34, + "LEMBQMSumRetrieval": 21.54, + "LEMBSummScreenFDRetrieval": 60.43, + "LEMBWikimQARetrieval": 44.92, + "LeCaRDv2": 18.09, + "LegalBenchConsumerContractsQA": 75.25, + "LegalBenchCorporateLobbying": 89.04, + "LegalQuAD": 10.67, + "LegalSummarization": 58.55, + "MMarcoRetrieval": 7.13, + "MSMARCO": 39.75, + "MedicalRetrieval": 1.71, + "MintakaRetrieval (ar)": 1.97, + "MintakaRetrieval (de)": 17.21, + "MintakaRetrieval (es)": 10.11, + "MintakaRetrieval (fr)": 12.93, + "MintakaRetrieval (hi)": 2.03, + "MintakaRetrieval (it)": 5.63, + "MintakaRetrieval (ja)": 6.77, + "MintakaRetrieval (pt)": 8.05, + "NFCorpus": 33.29, + "NFCorpus-PL": 8.77, + "NQ": 50.45, + "PIQA": 29.03, + "Quail": 3.41, + "QuoraRetrieval": 87.46, + "RARbCode": 53.21, + "RARbMath": 71.85, + "RuBQRetrieval": 4.75, + "SCIDOCS": 23.76, + "SCIDOCS-PL": 4.02, + "SIQA": 2.38, + "SciFact": 65.57, + "SciFact-PL": 13.31, + "SpartQA": 0.22, + "StackOverflowQA": 90.32, + "SyntecRetrieval": 57.39, + "SyntheticText2SQL": 45.09, + "T2Retrieval": 2.98, + "TRECCOVID": 51.33, + "TRECCOVID-PL": 12.11, + "TempReasonL1": 1.77, + "TempReasonL2Fact": 11.2, + "TempReasonL2Pure": 1.15, + "TempReasonL3Fact": 9.42, + "TempReasonL3Pure": 5.59, + "Touche2020": 19.93, + "VideoRetrieval": 8.48, + "WinoGrande": 20.77, + "XPQARetrieval (ara-ara)": 9.42, + "XPQARetrieval (eng-ara)": 2.36, + "XPQARetrieval (ara-eng)": 8.98, + "XPQARetrieval (deu-deu)": 55.82, + "XPQARetrieval (eng-deu)": 11.74, + "XPQARetrieval (deu-eng)": 30.44, + "XPQARetrieval (spa-spa)": 40.01, + "XPQARetrieval (eng-spa)": 6.12, + "XPQARetrieval (spa-eng)": 29.44, + "XPQARetrieval (fra-fra)": 51.94, + "XPQARetrieval (eng-fra)": 11.48, + "XPQARetrieval (fra-eng)": 32.52, + "XPQARetrieval (hin-hin)": 37.45, + "XPQARetrieval (eng-hin)": 5.11, + "XPQARetrieval (hin-eng)": 7.37, + "XPQARetrieval (ita-ita)": 54.2, + "XPQARetrieval (eng-ita)": 6.08, + "XPQARetrieval (ita-eng)": 30.32, + "XPQARetrieval (jpn-jpn)": 37.46, + "XPQARetrieval (eng-jpn)": 5.79, + "XPQARetrieval (jpn-eng)": 14.77, + "XPQARetrieval (kor-kor)": 10.39, + "XPQARetrieval (eng-kor)": 7.09, + "XPQARetrieval (kor-eng)": 6.96, + "XPQARetrieval (pol-pol)": 23.71, + "XPQARetrieval (eng-pol)": 8.83, + "XPQARetrieval (pol-eng)": 15.94, + "XPQARetrieval (por-por)": 33.56, + "XPQARetrieval (eng-por)": 3.76, + "XPQARetrieval (por-eng)": 23.45, + "XPQARetrieval (tam-tam)": 5.5, + "XPQARetrieval (eng-tam)": 3.3, + "XPQARetrieval (tam-eng)": 4.18, + "XPQARetrieval (cmn-cmn)": 23.8, + "XPQARetrieval (eng-cmn)": 7.2, + "XPQARetrieval (cmn-eng)": 12.84 } ], "recall_at_1": [ { - "Model": "Cohere-embed-english-v3.0", - "BrightRetrieval (robotics)": 9.9, - "BrightRetrieval (psychology)": 20.5, - "BrightRetrieval (biology)": 31.47, - "BrightRetrieval (economics)": 17.96, - "BrightRetrieval (stackoverflow)": 15.81, - "BrightRetrieval (pony)": 0.84, - "BrightRetrieval (sustainable_living)": 15.23, - "BrightRetrieval (earth_science)": 35.49 + "Model": "all-mpnet-base-v2", + "BrightRetrieval (biology)": 25.57, + "BrightRetrieval (sustainable_living)": 18.01, + "BrightRetrieval (psychology)": 15.84, + "BrightRetrieval (economics)": 18.93, + "BrightRetrieval (earth_science)": 34.05, + "BrightRetrieval (stackoverflow)": 14.96, + "BrightRetrieval (pony)": 1.19, + "BrightRetrieval (robotics)": 10.89 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "all-mpnet-base-v2", + "AFQMC": 8.01, + "ATEC": 14.03, + "BIOSSES": 80.43, + "BQ": 21.39, + "CDSC-R": 77.04, + "LCQMC": 22.84, + "PAWSX": 6.44, + "RUParaPhraserSTS": 42.15, + "RuSTSBenchmarkSTS": 55.68, + "SICK-R": 80.59, + "SICK-R-PL": 50.2, + "SICKFr": 67.05, + "STS12": 72.63, + "STS13": 83.48, + "STS14": 78.0, + "STS15": 85.66, + "STS16": 80.03, + "STS17 (fr-en)": 41.64, + "STS17 (nl-en)": 32.89, + "STS17 (es-en)": 25.28, + "STS17 (en-en)": 90.6, + "STS17 (ko-ko)": 39.11, + "STS17 (ar-ar)": 55.42, + "STS17 (es-es)": 78.4, + "STS17 (en-de)": 35.5, + "STS17 (en-ar)": 6.76, + "STS17 (en-tr)": -4.58, + "STS17 (it-en)": 31.8, + "STS22 (pl)": 24.21, + "STS22 (it)": 58.02, + "STS22 (es-en)": 55.09, + "STS22 (fr)": 77.1, + "STS22 (tr)": 29.35, + "STS22 (zh)": 42.24, + "STS22 (de)": 27.0, + "STS22 (es-it)": 41.61, + "STS22 (fr-pl)": 73.25, + "STS22 (de-en)": 49.73, + "STS22 (zh-en)": 40.47, + "STS22 (es)": 55.98, + "STS22 (ar)": 38.96, + "STS22 (en)": 68.39, + "STS22 (de-pl)": 23.53, + "STS22 (ru)": 15.83, + "STS22 (pl-en)": 51.07, + "STS22 (de-fr)": 31.39, + "STSB": 37.7, + "STSBenchmark": 83.42, + "STSBenchmarkMultilingualSTS (nl)": 57.01, + "STSBenchmarkMultilingualSTS (ru)": 55.54, + "STSBenchmarkMultilingualSTS (fr)": 65.15, + "STSBenchmarkMultilingualSTS (it)": 62.72, + "STSBenchmarkMultilingualSTS (es)": 65.78, + "STSBenchmarkMultilingualSTS (en)": 83.42, + "STSBenchmarkMultilingualSTS (de)": 61.43, + "STSBenchmarkMultilingualSTS (pt)": 62.12, + "STSBenchmarkMultilingualSTS (zh)": 39.43, + "STSBenchmarkMultilingualSTS (pl)": 52.36 + }, + { + "Model": "all-mpnet-base-v2", + "STS17": 90.6, + "STS22": 67.95 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "all-mpnet-base-v2", + "SummEval": 27.49, + "SummEvalFr": 28.11 + }, + { + "Model": "all-mpnet-base-v2", + "SummEval": 27.49, + "SummEvalFr": 28.11 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "all-mpnet-base-v2", + "CEDRClassification": 35.98, + "SensitiveTopicsClassification": 17.83 + } + ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "Cohere-embed-english-v3.0", - "Core17InstructionRetrieval": 2.8, - "News21InstructionRetrieval": 0.2, - "Robust04InstructionRetrieval": -3.63 + "Model": "all-mpnet-base-v2", + "Core17InstructionRetrieval": -0.74, + "News21InstructionRetrieval": -1.79, + "Robust04InstructionRetrieval": -6.71 } ] } }, - "dragon-plus": { + "all-mpnet-base-v2-instruct": { "BitextMining": { "f1": [] }, @@ -4415,22 +5865,22 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "dragon-plus", - "ARCChallenge": 8.91, - "AlphaNLI": 32.1, - "HellaSwag": 27.69, - "PIQA": 28.01, - "Quail": 4.09, - "RARbCode": 17.58, - "RARbMath": 45.09, - "SIQA": 2.0, - "SpartQA": 10.34, - "TempReasonL1": 1.82, - "TempReasonL2Fact": 17.45, - "TempReasonL2Pure": 0.55, - "TempReasonL3Fact": 15.71, - "TempReasonL3Pure": 7.97, - "WinoGrande": 67.18 + "Model": "all-mpnet-base-v2-instruct", + "ARCChallenge": 10.35, + "AlphaNLI": 1.96, + "HellaSwag": 13.01, + "PIQA": 27.18, + "Quail": 3.02, + "RARbCode": 48.95, + "RARbMath": 69.21, + "SIQA": 1.29, + "SpartQA": 1.01, + "TempReasonL1": 1.52, + "TempReasonL2Fact": 7.28, + "TempReasonL2Pure": 1.03, + "TempReasonL3Fact": 7.03, + "TempReasonL3Pure": 5.16, + "WinoGrande": 9.66 } ] }, @@ -4447,118 +5897,118 @@ "p-MRR": [] } }, - "voyage-lite-01-instruct": { + "allenai-specter": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "voyage-lite-01-instruct", - "AmazonCounterfactualClassification (en)": 71.43, - "AmazonPolarityClassification": 96.41, - "AmazonReviewsClassification (en)": 57.06, - "Banking77Classification": 81.64, - "EmotionClassification": 48.29, - "ImdbClassification": 95.49, - "MTOPDomainClassification (en)": 96.3, - "MTOPIntentClassification (en)": 67.93, - "MassiveIntentClassification (en)": 71.29, - "MassiveScenarioClassification (en)": 76.74, - "ToxicConversationsClassification": 75.45, - "TweetSentimentExtractionClassification": 59.44 + "Model": "allenai-specter", + "AmazonCounterfactualClassification": 43.87, + "AmazonPolarityClassification": 57.77, + "AmazonReviewsClassification": 20.49, + "Banking77Classification": 66.66, + "EmotionClassification": 24.82, + "ImdbClassification": 56.35, + "MTOPDomainClassification": 14.98, + "MTOPIntentClassification": 4.67, + "MassiveIntentClassification": 4.79, + "MassiveScenarioClassification": 10.19, + "ToxicConversationsClassification": 57.44, + "TweetSentimentExtractionClassification": 45.52 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-lite-01-instruct", - "ArxivClusteringP2P": 47.92, - "ArxivClusteringS2S": 42.42, - "BiorxivClusteringP2P": 38.72, - "BiorxivClusteringS2S": 36.6, - "MedrxivClusteringP2P": 34.04, - "MedrxivClusteringS2S": 32.81, - "RedditClustering": 61.56, - "RedditClusteringP2P": 65.35, - "StackExchangeClustering": 70.16, - "StackExchangeClusteringP2P": 38.23, - "TwentyNewsgroupsClustering": 53.56 + "Model": "allenai-specter", + "ArxivClusteringP2P": 44.75, + "ArxivClusteringS2S": 35.27, + "BiorxivClusteringP2P": 39.52, + "BiorxivClusteringS2S": 34.53, + "MedrxivClusteringP2P": 35.04, + "MedrxivClusteringS2S": 31.66, + "RedditClustering": 24.13, + "RedditClusteringP2P": 35.06, + "StackExchangeClustering": 39.01, + "StackExchangeClusteringP2P": 31.46, + "TwentyNewsgroupsClustering": 24.22 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-lite-01-instruct", - "SprintDuplicateQuestions": 96.01, - "TwitterSemEval2015": 76.87, - "TwitterURLCorpus": 86.84 + "Model": "allenai-specter", + "SprintDuplicateQuestions": 71.63, + "TwitterSemEval2015": 43.25, + "TwitterURLCorpus": 69.22 }, { - "Model": "voyage-lite-01-instruct", - "SprintDuplicateQuestions": 96.01, - "TwitterSemEval2015": 76.87, - "TwitterURLCorpus": 86.84 + "Model": "allenai-specter", + "SprintDuplicateQuestions": 71.63, + "TwitterSemEval2015": 43.25, + "TwitterURLCorpus": 69.22 } ] }, "Reranking": { "map": [ { - "Model": "voyage-lite-01-instruct", - "AskUbuntuDupQuestions": 65.77, - "MindSmallReranking": 31.69, - "SciDocsRR": 87.03, - "StackOverflowDupQuestions": 54.49 + "Model": "allenai-specter", + "AskUbuntuDupQuestions": 50.07, + "MindSmallReranking": 24.8, + "SciDocsRR": 81.31, + "StackOverflowDupQuestions": 36.22 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-lite-01-instruct", - "ArguAna": 58.73, - "CQADupstackRetrieval": 45.11, - "ClimateFEVER": 37.47, - "DBPedia": 43.42, - "FEVER": 89.71, - "FiQA2018": 44.79, - "HotpotQA": 70.46, - "MSMARCO": 39.66, - "NFCorpus": 43.33, - "NQ": 60.65, - "QuoraRetrieval": 87.83, - "SCIDOCS": 23.19, - "SciFact": 73.64, - "TRECCOVID": 78.92, - "Touche2020": 36.83 + "Model": "allenai-specter", + "ArguAna": 32.67, + "CQADupstackRetrieval": 14.6, + "ClimateFEVER": 6.86, + "DBPedia": 4.14, + "FEVER": 5.45, + "FiQA2018": 5.64, + "HotpotQA": 5.46, + "MSMARCO": 5.59, + "NFCorpus": 0.85, + "NQ": 5.99, + "QuoraRetrieval": 64.65, + "SCIDOCS": 0.0, + "SciFact": 47.88, + "TRECCOVID": 29.91, + "Touche2020": 8.46 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-lite-01-instruct", - "BIOSSES": 84.85, - "SICK-R": 79.71, - "STS12": 77.09, - "STS13": 88.91, - "STS14": 82.08, - "STS15": 89.21, - "STS16": 84.74, - "STS17 (en-en)": 90.73, - "STS22 (en)": 62.1, - "STSBenchmark": 89.86 + "Model": "allenai-specter", + "BIOSSES": 64.95, + "SICK-R": 56.39, + "STS12": 62.49, + "STS13": 58.7, + "STS14": 54.87, + "STS15": 62.54, + "STS16": 64.27, + "STS17": 24.92, + "STS22": 8.44, + "STSBenchmark": 61.26 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "voyage-lite-01-instruct", - "SummEval": 30.97 + "Model": "allenai-specter", + "SummEval": 27.66 } ] }, @@ -4569,238 +6019,178 @@ "p-MRR": [] } }, - "bm25": { + "bert-base-10lang-cased": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bert-base-10lang-cased", + "AmazonReviewsClassification": 29.38, + "MTOPDomainClassification": 63.65, + "MTOPIntentClassification": 37.87, + "MasakhaNEWSClassification": 63.93, + "MassiveIntentClassification": 37.28, + "MassiveScenarioClassification": 44.5 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "bert-base-10lang-cased", + "AlloProfClusteringP2P": 53.22, + "AlloProfClusteringS2S": 42.92, + "HALClusteringS2S": 19.94, + "MLSUMClusteringP2P": 40.96, + "MLSUMClusteringS2S": 31.87, + "MasakhaNEWSClusteringP2P": 24.23, + "MasakhaNEWSClusteringS2S": 24.46 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "bert-base-10lang-cased", + "OpusparcusPC": 86.79, + "PawsXPairClassification": 53.4 + }, + { + "Model": "bert-base-10lang-cased", + "OpusparcusPC": 87.78, + "PawsXPairClassification": 53.4 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bert-base-10lang-cased", + "AlloprofReranking": 36.21, + "SyntecReranking": 53.25 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bm25", - "BrightRetrieval (robotics)": 13.53, - "BrightRetrieval (pony)": 7.93, - "BrightRetrieval (leetcode)": 24.37, - "BrightRetrieval (earth_science)": 27.06, - "BrightRetrieval (stackoverflow)": 16.55, - "BrightRetrieval (economics)": 14.87, - "BrightRetrieval (theoremqa_questions)": 9.78, - "BrightRetrieval (theoremqa_theorems)": 4.75, - "BrightRetrieval (psychology)": 12.51, - "BrightRetrieval (sustainable_living)": 15.22, - "BrightRetrieval (biology)": 19.19, - "BrightRetrieval (aops)": 6.2 - } - ], - "recall_at_1": [ - { - "Model": "bm25", - "BrightRetrieval (robotics)": 7.43, - "BrightRetrieval (pony)": 5.35, - "BrightRetrieval (biology)": 10.68, - "BrightRetrieval (stackoverflow)": 22.22, - "BrightRetrieval (earth_science)": 15.37, - "BrightRetrieval (psychology)": 8.42, - "BrightRetrieval (sustainable_living)": 10.68, - "BrightRetrieval (economics)": 10.68 + "Model": "bert-base-10lang-cased", + "AlloprofRetrieval": 1.6, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 3.55, + "SyntecRetrieval": 18.95, + "XPQARetrieval": 18.39 } ] }, "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "cosine_spearman": [ { - "Model": "bm25", - "Core17InstructionRetrieval": -1.06, - "News21InstructionRetrieval": -2.15, - "Robust04InstructionRetrieval": -3.06 + "Model": "bert-base-10lang-cased", + "SICKFr": 58.76, + "STS22": 40.31, + "STSBenchmarkMultilingualSTS": 52.25 } ] - } - }, - "e5-large-v2": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] }, - "Clustering": { - "v_measure": [ + "Summarization": { + "cosine_spearman": [ { - "Model": "e5-large-v2", - "BiorxivClusteringP2P": 36.72, - "BiorxivClusteringS2S": 35.47, - "MedrxivClusteringP2P": 31.45, - "MedrxivClusteringS2S": 29.91, - "RedditClustering": 55.5, - "RedditClusteringP2P": 63.71, - "StackExchangeClustering": 65.23, - "StackExchangeClusteringP2P": 33.62, - "TwentyNewsgroupsClustering": 48.73 + "Model": "bert-base-10lang-cased", + "SummEvalFr": 29.06 } ] }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "e5-large-v2", - "Core17InstructionRetrieval": 0.12, - "News21InstructionRetrieval": 0.87, - "Robust04InstructionRetrieval": -4.16 - } - ] + "p-MRR": [] } }, - "google-gecko-256.text-embedding-preview-0409": { + "bert-base-15lang-cased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "AmazonCounterfactualClassification (en)": 70.93, - "AmazonPolarityClassification": 97.34, - "AmazonReviewsClassification (en)": 48.47, - "Banking77Classification": 86.01, - "EmotionClassification": 51.53, - "ImdbClassification": 95.7, - "MTOPDomainClassification (en)": 98.02, - "MTOPIntentClassification (en)": 77.82, - "MassiveIntentClassification (en)": 75.67, - "MassiveScenarioClassification (en)": 85.16, - "ToxicConversationsClassification": 88.33, - "TweetSentimentExtractionClassification": 72.97 + "Model": "bert-base-15lang-cased", + "AmazonReviewsClassification": 29.35, + "MTOPDomainClassification": 63.7, + "MTOPIntentClassification": 37.85, + "MasakhaNEWSClassification": 63.89, + "MassiveIntentClassification": 37.28, + "MassiveScenarioClassification": 44.47 } ] }, "Clustering": { "v_measure": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "ArxivClusteringP2P": 44.12, - "ArxivClusteringS2S": 36.54, - "BiorxivClusteringP2P": 36.28, - "BiorxivClusteringS2S": 33.09, - "MedrxivClusteringP2P": 32.08, - "MedrxivClusteringS2S": 30.84, - "RedditClustering": 62.24, - "RedditClusteringP2P": 63.7, - "StackExchangeClustering": 70.19, - "StackExchangeClusteringP2P": 36.1, - "TwentyNewsgroupsClustering": 50.6 + "Model": "bert-base-15lang-cased", + "AlloProfClusteringP2P": 53.16, + "AlloProfClusteringS2S": 43.43, + "HALClusteringS2S": 20.26, + "MLSUMClusteringP2P": 41.22, + "MLSUMClusteringS2S": 31.88, + "MasakhaNEWSClusteringP2P": 24.23, + "MasakhaNEWSClusteringS2S": 24.46 } ] }, "PairClassification": { "max_ap": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "SprintDuplicateQuestions": 96.49, - "TwitterSemEval2015": 78.23, - "TwitterURLCorpus": 87.04 + "Model": "bert-base-15lang-cased", + "OpusparcusPC": 86.78, + "PawsXPairClassification": 53.38 }, { - "Model": "google-gecko-256.text-embedding-preview-0409", - "SprintDuplicateQuestions": 96.49, - "TwitterSemEval2015": 78.25, - "TwitterURLCorpus": 87.12 + "Model": "bert-base-15lang-cased", + "OpusparcusPC": 87.73, + "PawsXPairClassification": 53.38 } ] }, "Reranking": { "map": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "AskUbuntuDupQuestions": 63.84, - "MindSmallReranking": 31.89, - "SciDocsRR": 81.62, - "StackOverflowDupQuestions": 53.76 + "Model": "bert-base-15lang-cased", + "AlloprofReranking": 36.21, + "SyntecReranking": 53.25 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "ArguAna": 56.27, - "CQADupstackRetrieval": 45.41, - "ClimateFEVER": 29.35, - "DBPedia": 41.91, - "FEVER": 82.61, - "FiQA2018": 55.54, - "HotpotQA": 64.65, - "MSMARCO": 31.12, - "NFCorpus": 37.81, - "NQ": 57.37, - "QuoraRetrieval": 87.89, - "SCIDOCS": 18.21, - "SciFact": 70.86, - "TRECCOVID": 80.13, - "Touche2020": 27.4 + "Model": "bert-base-15lang-cased", + "AlloprofRetrieval": 1.61, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 3.55, + "SyntecRetrieval": 18.95, + "XPQARetrieval": 18.35 } ] }, "STS": { "cosine_spearman": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "BIOSSES": 89.42, - "SICK-R": 81.67, - "STS12": 78.02, - "STS13": 90.1, - "STS14": 85.44, - "STS15": 89.64, - "STS16": 87.24, - "STS17 (en-en)": 90.46, - "STS22 (en)": 67.99, - "STSBenchmark": 89.33 + "Model": "bert-base-15lang-cased", + "SICKFr": 58.77, + "STS22": 40.4, + "STSBenchmarkMultilingualSTS": 52.25 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "google-gecko-256.text-embedding-preview-0409", - "SummEval": 32.36 + "Model": "bert-base-15lang-cased", + "SummEvalFr": 29.13 } ] }, @@ -4811,972 +6201,180 @@ "p-MRR": [] } }, - "paraphrase-multilingual-MiniLM-L12-v2": { + "bert-base-25lang-cased": { "BitextMining": { - "f1": [ - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "BUCC (de-en)": 97.11, - "BUCC (fr-en)": 94.99, - "BUCC (ru-en)": 95.06, - "BUCC (zh-en)": 95.63, - "BornholmBitextMining (dan-Latn)": 19.67, - "Tatoeba (est-Latn_eng-Latn)": 97.33, - "Tatoeba (aze-Latn_eng-Latn)": 62.1, - "Tatoeba (oci-Latn_eng-Latn)": 38.57, - "Tatoeba (eus-Latn_eng-Latn)": 23.18, - "Tatoeba (sqi-Latn_eng-Latn)": 98.17, - "Tatoeba (yue-Hant_eng-Latn)": 71.45, - "Tatoeba (ara-Arab_eng-Latn)": 87.93, - "Tatoeba (wuu-Hans_eng-Latn)": 76.0, - "Tatoeba (lvs-Latn_eng-Latn)": 97.87, - "Tatoeba (ron-Latn_eng-Latn)": 95.3, - "Tatoeba (uzb-Latn_eng-Latn)": 17.14, - "Tatoeba (ell-Grek_eng-Latn)": 95.43, - "Tatoeba (lit-Latn_eng-Latn)": 93.16, - "Tatoeba (isl-Latn_eng-Latn)": 24.07, - "Tatoeba (awa-Deva_eng-Latn)": 33.43, - "Tatoeba (tuk-Latn_eng-Latn)": 15.16, - "Tatoeba (nld-Latn_eng-Latn)": 94.58, - "Tatoeba (ido-Latn_eng-Latn)": 40.25, - "Tatoeba (afr-Latn_eng-Latn)": 58.22, - "Tatoeba (cmn-Hans_eng-Latn)": 94.93, - "Tatoeba (max-Deva_eng-Latn)": 45.25, - "Tatoeba (tgl-Latn_eng-Latn)": 13.09, - "Tatoeba (ind-Latn_eng-Latn)": 92.74, - "Tatoeba (mkd-Cyrl_eng-Latn)": 91.0, - "Tatoeba (bel-Cyrl_eng-Latn)": 67.73, - "Tatoeba (hrv-Latn_eng-Latn)": 95.98, - "Tatoeba (bre-Latn_eng-Latn)": 5.56, - "Tatoeba (bul-Cyrl_eng-Latn)": 92.65, - "Tatoeba (hun-Latn_eng-Latn)": 91.58, - "Tatoeba (srp-Cyrl_eng-Latn)": 92.24, - "Tatoeba (pol-Latn_eng-Latn)": 94.28, - "Tatoeba (mhr-Cyrl_eng-Latn)": 6.89, - "Tatoeba (fra-Latn_eng-Latn)": 91.72, - "Tatoeba (lfn-Latn_eng-Latn)": 47.02, - "Tatoeba (cha-Latn_eng-Latn)": 15.98, - "Tatoeba (hsb-Latn_eng-Latn)": 36.1, - "Tatoeba (rus-Cyrl_eng-Latn)": 91.87, - "Tatoeba (khm-Khmr_eng-Latn)": 32.11, - "Tatoeba (arq-Arab_eng-Latn)": 18.6, - "Tatoeba (gla-Latn_eng-Latn)": 3.61, - "Tatoeba (ces-Latn_eng-Latn)": 95.12, - "Tatoeba (ita-Latn_eng-Latn)": 93.05, - "Tatoeba (csb-Latn_eng-Latn)": 21.56, - "Tatoeba (kor-Hang_eng-Latn)": 92.52, - "Tatoeba (ile-Latn_eng-Latn)": 57.71, - "Tatoeba (xho-Latn_eng-Latn)": 4.52, - "Tatoeba (deu-Latn_eng-Latn)": 97.02, - "Tatoeba (heb-Hebr_eng-Latn)": 86.88, - "Tatoeba (yid-Hebr_eng-Latn)": 14.38, - "Tatoeba (cym-Latn_eng-Latn)": 13.25, - "Tatoeba (jpn-Jpan_eng-Latn)": 90.41, - "Tatoeba (tzl-Latn_eng-Latn)": 25.46, - "Tatoeba (ast-Latn_eng-Latn)": 62.17, - "Tatoeba (war-Latn_eng-Latn)": 7.25, - "Tatoeba (tha-Thai_eng-Latn)": 96.72, - "Tatoeba (fry-Latn_eng-Latn)": 31.13, - "Tatoeba (dtp-Latn_eng-Latn)": 5.69, - "Tatoeba (hye-Armn_eng-Latn)": 93.28, - "Tatoeba (mar-Deva_eng-Latn)": 92.38, - "Tatoeba (cbk-Latn_eng-Latn)": 55.37, - "Tatoeba (uig-Arab_eng-Latn)": 24.39, - "Tatoeba (cor-Latn_eng-Latn)": 3.42, - "Tatoeba (nov-Latn_eng-Latn)": 47.99, - "Tatoeba (slv-Latn_eng-Latn)": 96.92, - "Tatoeba (kzj-Latn_eng-Latn)": 6.24, - "Tatoeba (dan-Latn_eng-Latn)": 94.8, - "Tatoeba (hin-Deva_eng-Latn)": 97.62, - "Tatoeba (ukr-Cyrl_eng-Latn)": 92.82, - "Tatoeba (jav-Latn_eng-Latn)": 17.04, - "Tatoeba (swe-Latn_eng-Latn)": 94.42, - "Tatoeba (swg-Latn_eng-Latn)": 26.31, - "Tatoeba (cat-Latn_eng-Latn)": 94.42, - "Tatoeba (ina-Latn_eng-Latn)": 79.13, - "Tatoeba (mal-Mlym_eng-Latn)": 32.2, - "Tatoeba (gle-Latn_eng-Latn)": 11.62, - "Tatoeba (tel-Telu_eng-Latn)": 36.4, - "Tatoeba (zsm-Latn_eng-Latn)": 95.31, - "Tatoeba (swh-Latn_eng-Latn)": 14.48, - "Tatoeba (tam-Taml_eng-Latn)": 24.64, - "Tatoeba (epo-Latn_eng-Latn)": 41.73, - "Tatoeba (pms-Latn_eng-Latn)": 30.7, - "Tatoeba (mon-Cyrl_eng-Latn)": 95.04, - "Tatoeba (kat-Geor_eng-Latn)": 95.44, - "Tatoeba (urd-Arab_eng-Latn)": 94.57, - "Tatoeba (vie-Latn_eng-Latn)": 95.12, - "Tatoeba (fin-Latn_eng-Latn)": 93.1, - "Tatoeba (tur-Latn_eng-Latn)": 95.08, - "Tatoeba (arz-Arab_eng-Latn)": 51.26, - "Tatoeba (fao-Latn_eng-Latn)": 27.51, - "Tatoeba (pes-Arab_eng-Latn)": 92.59, - "Tatoeba (tat-Cyrl_eng-Latn)": 10.25, - "Tatoeba (nds-Latn_eng-Latn)": 32.16, - "Tatoeba (nno-Latn_eng-Latn)": 76.34, - "Tatoeba (ber-Tfng_eng-Latn)": 4.43, - "Tatoeba (bos-Latn_eng-Latn)": 93.27, - "Tatoeba (slk-Latn_eng-Latn)": 95.15, - "Tatoeba (spa-Latn_eng-Latn)": 95.42, - "Tatoeba (pam-Latn_eng-Latn)": 5.41, - "Tatoeba (ben-Beng_eng-Latn)": 36.48, - "Tatoeba (ang-Latn_eng-Latn)": 10.24, - "Tatoeba (kur-Latn_eng-Latn)": 46.94, - "Tatoeba (por-Latn_eng-Latn)": 92.13, - "Tatoeba (orv-Cyrl_eng-Latn)": 15.1, - "Tatoeba (dsb-Latn_eng-Latn)": 33.43, - "Tatoeba (amh-Ethi_eng-Latn)": 36.21, - "Tatoeba (kab-Latn_eng-Latn)": 1.16, - "Tatoeba (kaz-Cyrl_eng-Latn)": 34.89, - "Tatoeba (nob-Latn_eng-Latn)": 97.73, - "Tatoeba (ceb-Latn_eng-Latn)": 8.05, - "Tatoeba (lat-Latn_eng-Latn)": 19.47, - "Tatoeba (glg-Latn_eng-Latn)": 94.0, - "Tatoeba (gsw-Latn_eng-Latn)": 25.74, - "Tatoeba (afr-eng)": 58.22, - "Tatoeba (amh-eng)": 36.21, - "Tatoeba (ang-eng)": 10.24, - "Tatoeba (ara-eng)": 87.93, - "Tatoeba (arq-eng)": 18.6, - "Tatoeba (arz-eng)": 51.26, - "Tatoeba (ast-eng)": 62.17, - "Tatoeba (awa-eng)": 33.43, - "Tatoeba (aze-eng)": 62.1, - "Tatoeba (bel-eng)": 67.73, - "Tatoeba (ben-eng)": 36.48, - "Tatoeba (ber-eng)": 4.43, - "Tatoeba (bos-eng)": 93.27, - "Tatoeba (bre-eng)": 5.56, - "Tatoeba (bul-eng)": 92.65, - "Tatoeba (cat-eng)": 94.42, - "Tatoeba (cbk-eng)": 55.37, - "Tatoeba (ceb-eng)": 8.05, - "Tatoeba (ces-eng)": 95.12, - "Tatoeba (cha-eng)": 15.98, - "Tatoeba (cmn-eng)": 94.93, - "Tatoeba (cor-eng)": 3.42, - "Tatoeba (csb-eng)": 21.56, - "Tatoeba (cym-eng)": 13.25, - "Tatoeba (dan-eng)": 94.8, - "Tatoeba (deu-eng)": 97.02, - "Tatoeba (dsb-eng)": 33.43, - "Tatoeba (dtp-eng)": 5.69, - "Tatoeba (ell-eng)": 95.43, - "Tatoeba (epo-eng)": 41.73, - "Tatoeba (est-eng)": 97.33, - "Tatoeba (eus-eng)": 23.18, - "Tatoeba (fao-eng)": 27.51, - "Tatoeba (fin-eng)": 93.1, - "Tatoeba (fra-eng)": 91.72, - "Tatoeba (fry-eng)": 31.13, - "Tatoeba (gla-eng)": 3.61, - "Tatoeba (gle-eng)": 11.62, - "Tatoeba (glg-eng)": 94.0, - "Tatoeba (gsw-eng)": 25.74, - "Tatoeba (heb-eng)": 86.88, - "Tatoeba (hin-eng)": 97.62, - "Tatoeba (hrv-eng)": 95.98, - "Tatoeba (hsb-eng)": 36.1, - "Tatoeba (hun-eng)": 91.58, - "Tatoeba (hye-eng)": 93.28, - "Tatoeba (ido-eng)": 40.25, - "Tatoeba (ile-eng)": 57.71, - "Tatoeba (ina-eng)": 79.13, - "Tatoeba (ind-eng)": 92.74, - "Tatoeba (isl-eng)": 24.07, - "Tatoeba (ita-eng)": 93.05, - "Tatoeba (jav-eng)": 17.04, - "Tatoeba (jpn-eng)": 90.41, - "Tatoeba (kab-eng)": 1.16, - "Tatoeba (kat-eng)": 95.44, - "Tatoeba (kaz-eng)": 34.89, - "Tatoeba (khm-eng)": 32.11, - "Tatoeba (kor-eng)": 92.52, - "Tatoeba (kur-eng)": 46.94, - "Tatoeba (kzj-eng)": 6.24, - "Tatoeba (lat-eng)": 19.47, - "Tatoeba (lfn-eng)": 47.02, - "Tatoeba (lit-eng)": 93.16, - "Tatoeba (lvs-eng)": 97.87, - "Tatoeba (mal-eng)": 32.2, - "Tatoeba (mar-eng)": 92.38, - "Tatoeba (max-eng)": 45.25, - "Tatoeba (mhr-eng)": 6.89, - "Tatoeba (mkd-eng)": 91.0, - "Tatoeba (mon-eng)": 95.04, - "Tatoeba (nds-eng)": 32.16, - "Tatoeba (nld-eng)": 94.58, - "Tatoeba (nno-eng)": 76.34, - "Tatoeba (nob-eng)": 97.73, - "Tatoeba (nov-eng)": 47.99, - "Tatoeba (oci-eng)": 38.57, - "Tatoeba (orv-eng)": 15.1, - "Tatoeba (pam-eng)": 5.41, - "Tatoeba (pes-eng)": 92.59, - "Tatoeba (pms-eng)": 30.7, - "Tatoeba (pol-eng)": 94.28, - "Tatoeba (por-eng)": 92.13, - "Tatoeba (ron-eng)": 95.3, - "Tatoeba (rus-eng)": 91.87, - "Tatoeba (slk-eng)": 95.15, - "Tatoeba (slv-eng)": 96.92, - "Tatoeba (spa-eng)": 95.42, - "Tatoeba (sqi-eng)": 98.17, - "Tatoeba (srp-eng)": 92.24, - "Tatoeba (swe-eng)": 94.42, - "Tatoeba (swg-eng)": 26.31, - "Tatoeba (swh-eng)": 14.48, - "Tatoeba (tam-eng)": 24.64, - "Tatoeba (tat-eng)": 10.25, - "Tatoeba (tel-eng)": 36.4, - "Tatoeba (tgl-eng)": 13.09, - "Tatoeba (tha-eng)": 96.72, - "Tatoeba (tuk-eng)": 15.16, - "Tatoeba (tur-eng)": 95.08, - "Tatoeba (tzl-eng)": 25.46, - "Tatoeba (uig-eng)": 24.39, - "Tatoeba (ukr-eng)": 92.82, - "Tatoeba (urd-eng)": 94.57, - "Tatoeba (uzb-eng)": 17.14, - "Tatoeba (vie-eng)": 95.12, - "Tatoeba (war-eng)": 7.25, - "Tatoeba (wuu-eng)": 76.0, - "Tatoeba (xho-eng)": 4.52, - "Tatoeba (yid-eng)": 14.38, - "Tatoeba (yue-eng)": 71.45, - "Tatoeba (zsm-eng)": 95.31 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "AllegroReviews (pol-Latn)": 30.85, - "AllegroReviews": 30.88, - "AmazonCounterfactualClassification (en-ext)": 69.99, - "AmazonCounterfactualClassification (en)": 71.57, - "AmazonCounterfactualClassification (deu-Latn)": 68.36, - "AmazonCounterfactualClassification (jpn-Jpan)": 63.37, - "AmazonCounterfactualClassification (de)": 68.35, - "AmazonCounterfactualClassification (ja)": 63.45, - "AmazonPolarityClassification": 69.21, - "AmazonReviewsClassification (en)": 35.11, - "AmazonReviewsClassification (deu-Latn)": 35.91, - "AmazonReviewsClassification (spa-Latn)": 37.49, - "AmazonReviewsClassification (fra-Latn)": 35.29, - "AmazonReviewsClassification (jpn-Jpan)": 33.21, - "AmazonReviewsClassification (cmn-Hans)": 35.24, - "AmazonReviewsClassification (de)": 35.91, - "AmazonReviewsClassification (es)": 37.49, - "AmazonReviewsClassification (fr)": 35.3, - "AmazonReviewsClassification (ja)": 33.24, - "AmazonReviewsClassification (zh)": 35.26, - "AngryTweetsClassification (dan-Latn)": 50.9, - "Banking77Classification": 79.77, - "CBD (pol-Latn)": 57.71, - "CBD": 57.68, - "DanishPoliticalCommentsClassification (dan-Latn)": 37.58, - "EmotionClassification": 42.37, - "GeoreviewClassification (rus-Cyrl)": 38.24, - "HeadlineClassification (rus-Cyrl)": 68.3, - "IFlyTek (cmn-Hans)": 39.88, - "ImdbClassification": 60.46, - "InappropriatenessClassification (rus-Cyrl)": 58.18, - "JDReview (cmn-Hans)": 70.26, - "KinopoiskClassification (rus-Cyrl)": 41.45, - "LccSentimentClassification (dan-Latn)": 54.53, - "MTOPDomainClassification (en)": 87.06, - "MTOPDomainClassification (deu-Latn)": 79.21, - "MTOPDomainClassification (spa-Latn)": 83.06, - "MTOPDomainClassification (fra-Latn)": 78.64, - "MTOPDomainClassification (hin-Deva)": 81.36, - "MTOPDomainClassification (tha-Thai)": 79.97, - "MTOPDomainClassification (de)": 79.2, - "MTOPDomainClassification (es)": 83.04, - "MTOPDomainClassification (fr)": 78.63, - "MTOPDomainClassification (hi)": 81.36, - "MTOPDomainClassification (th)": 79.99, - "MTOPIntentClassification (en)": 65.52, - "MTOPIntentClassification (deu-Latn)": 54.21, - "MTOPIntentClassification (spa-Latn)": 60.3, - "MTOPIntentClassification (fra-Latn)": 54.01, - "MTOPIntentClassification (hin-Deva)": 59.92, - "MTOPIntentClassification (tha-Thai)": 61.97, - "MTOPIntentClassification (de)": 54.23, - "MTOPIntentClassification (es)": 60.28, - "MTOPIntentClassification (fr)": 54.05, - "MTOPIntentClassification (hi)": 59.9, - "MTOPIntentClassification (th)": 61.96, - "MasakhaNEWSClassification (amh-Ethi)": 64.28, - "MasakhaNEWSClassification (eng)": 74.7, - "MasakhaNEWSClassification (fra-Latn)": 71.68, - "MasakhaNEWSClassification (hau-Latn)": 47.96, - "MasakhaNEWSClassification (ibo-Latn)": 42.46, - "MasakhaNEWSClassification (lin-Latn)": 59.26, - "MasakhaNEWSClassification (lug-Latn)": 42.29, - "MasakhaNEWSClassification (orm-Ethi)": 34.98, - "MasakhaNEWSClassification (pcm-Latn)": 89.54, - "MasakhaNEWSClassification (run-Latn)": 47.2, - "MasakhaNEWSClassification (sna-Latn)": 57.56, - "MasakhaNEWSClassification (som-Latn)": 34.8, - "MasakhaNEWSClassification (swa-Latn)": 46.05, - "MasakhaNEWSClassification (tir-Ethi)": 27.94, - "MasakhaNEWSClassification (xho-Latn)": 44.81, - "MasakhaNEWSClassification (yor-Latn)": 52.92, - "MasakhaNEWSClassification (fra)": 76.09, - "MassiveIntentClassification (deu-Latn)": 50.71, - "MassiveIntentClassification (heb-Hebr)": 52.55, - "MassiveIntentClassification (tha-Thai)": 58.92, - "MassiveIntentClassification (swa-Latn)": 29.56, - "MassiveIntentClassification (cmo-Hant)": 58.74, - "MassiveIntentClassification (vie-Latn)": 56.62, - "MassiveIntentClassification (kor-Kore)": 50.36, - "MassiveIntentClassification (en)": 66.89, - "MassiveIntentClassification (hye-Armn)": 51.6, - "MassiveIntentClassification (tgl-Latn)": 33.67, - "MassiveIntentClassification (nld-Latn)": 59.52, - "MassiveIntentClassification (urd-Arab)": 52.79, - "MassiveIntentClassification (fin-Latn)": 57.56, - "MassiveIntentClassification (amh-Ethi)": 36.77, - "MassiveIntentClassification (kat-Geor)": 43.03, - "MassiveIntentClassification (ita-Latn)": 59.66, - "MassiveIntentClassification (kan-Knda)": 41.0, - "MassiveIntentClassification (cym-Latn)": 26.13, - "MassiveIntentClassification (lav-Latn)": 54.72, - "MassiveIntentClassification (sqi-Latn)": 56.6, - "MassiveIntentClassification (msa-Latn)": 54.81, - "MassiveIntentClassification (ben-Beng)": 35.38, - "MassiveIntentClassification (hin-Deva)": 58.37, - "MassiveIntentClassification (hun-Latn)": 60.44, - "MassiveIntentClassification (nob-Latn)": 55.52, - "MassiveIntentClassification (mon-Cyrl)": 51.77, - "MassiveIntentClassification (slv-Latn)": 57.35, - "MassiveIntentClassification (tur-Latn)": 59.91, - "MassiveIntentClassification (fra-Latn)": 60.24, - "MassiveIntentClassification (ind-Latn)": 59.9, - "MassiveIntentClassification (mya-Mymr)": 52.03, - "MassiveIntentClassification (cmo-Hans)": 62.0, - "MassiveIntentClassification (pol-Latn)": 59.48, - "MassiveIntentClassification (spa-Latn)": 59.7, - "MassiveIntentClassification (jpn-Jpan)": 60.9, - "MassiveIntentClassification (tam-Taml)": 36.82, - "MassiveIntentClassification (ell-Grek)": 58.7, - "MassiveIntentClassification (ara-Arab)": 45.15, - "MassiveIntentClassification (por-Latn)": 61.29, - "MassiveIntentClassification (jav-Latn)": 32.37, - "MassiveIntentClassification (afr-Latn)": 45.87, - "MassiveIntentClassification (rus-Cyrl)": 59.06, - "MassiveIntentClassification (tel-Telu)": 40.77, - "MassiveIntentClassification (aze-Latn)": 47.43, - "MassiveIntentClassification (isl-Latn)": 30.87, - "MassiveIntentClassification (mal-Mlym)": 42.44, - "MassiveIntentClassification (ron-Latn)": 58.44, - "MassiveIntentClassification (khm-Khmr)": 40.04, - "MassiveIntentClassification (fas-Arab)": 61.03, - "MassiveIntentClassification (dan-Latn)": 57.75, - "MassiveIntentClassification (swe-Latn)": 59.43, - "MassiveIntentClassification (pl)": 59.43, - "MassiveIntentClassification (fr)": 57.52, - "MassiveScenarioClassification (cmo-Hans)": 67.45, - "MassiveScenarioClassification (ben-Beng)": 41.17, - "MassiveScenarioClassification (swa-Latn)": 34.86, - "MassiveScenarioClassification (spa-Latn)": 65.07, - "MassiveScenarioClassification (tur-Latn)": 66.53, - "MassiveScenarioClassification (ara-Arab)": 51.71, - "MassiveScenarioClassification (fin-Latn)": 63.74, - "MassiveScenarioClassification (aze-Latn)": 52.09, - "MassiveScenarioClassification (mon-Cyrl)": 57.07, - "MassiveScenarioClassification (khm-Khmr)": 46.95, - "MassiveScenarioClassification (ron-Latn)": 64.2, - "MassiveScenarioClassification (heb-Hebr)": 59.22, - "MassiveScenarioClassification (en)": 71.54, - "MassiveScenarioClassification (amh-Ethi)": 41.89, - "MassiveScenarioClassification (cmo-Hant)": 65.72, - "MassiveScenarioClassification (kor-Kore)": 55.71, - "MassiveScenarioClassification (slv-Latn)": 64.01, - "MassiveScenarioClassification (kan-Knda)": 45.72, - "MassiveScenarioClassification (dan-Latn)": 66.87, - "MassiveScenarioClassification (tgl-Latn)": 37.39, - "MassiveScenarioClassification (deu-Latn)": 57.4, - "MassiveScenarioClassification (isl-Latn)": 37.55, - "MassiveScenarioClassification (sqi-Latn)": 64.34, - "MassiveScenarioClassification (ind-Latn)": 66.17, - "MassiveScenarioClassification (cym-Latn)": 31.71, - "MassiveScenarioClassification (hun-Latn)": 66.57, - "MassiveScenarioClassification (nld-Latn)": 65.53, - "MassiveScenarioClassification (jav-Latn)": 38.62, - "MassiveScenarioClassification (hye-Armn)": 56.11, - "MassiveScenarioClassification (tha-Thai)": 67.05, - "MassiveScenarioClassification (afr-Latn)": 53.63, - "MassiveScenarioClassification (mya-Mymr)": 59.09, - "MassiveScenarioClassification (fas-Arab)": 65.89, - "MassiveScenarioClassification (jpn-Jpan)": 66.49, - "MassiveScenarioClassification (kat-Geor)": 50.66, - "MassiveScenarioClassification (vie-Latn)": 60.73, - "MassiveScenarioClassification (tam-Taml)": 42.63, - "MassiveScenarioClassification (ell-Grek)": 66.14, - "MassiveScenarioClassification (swe-Latn)": 67.14, - "MassiveScenarioClassification (lav-Latn)": 59.82, - "MassiveScenarioClassification (rus-Cyrl)": 65.25, - "MassiveScenarioClassification (ita-Latn)": 65.01, - "MassiveScenarioClassification (msa-Latn)": 61.73, - "MassiveScenarioClassification (por-Latn)": 65.83, - "MassiveScenarioClassification (urd-Arab)": 60.41, - "MassiveScenarioClassification (pol-Latn)": 65.04, - "MassiveScenarioClassification (nob-Latn)": 64.25, - "MassiveScenarioClassification (hin-Deva)": 65.23, - "MassiveScenarioClassification (tel-Telu)": 46.49, - "MassiveScenarioClassification (fra-Latn)": 66.09, - "MassiveScenarioClassification (mal-Mlym)": 47.73, - "MassiveScenarioClassification (pl)": 65.04, - "MassiveScenarioClassification (fr)": 64.52, - "MultilingualSentiment (cmn-Hans)": 61.9, - "NoRecClassification (nob-Latn)": 46.7, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 42.52, - "OnlineShopping (cmn-Hans)": 84.89, - "PAC (pol-Latn)": 65.75, - "PAC": 65.76, - "PolEmo2.0-IN (pol-Latn)": 57.76, - "PolEmo2.0-IN": 57.76, - "PolEmo2.0-OUT (pol-Latn)": 28.66, - "PolEmo2.0-OUT": 28.7, - "RuReviewsClassification (rus-Cyrl)": 58.88, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 53.19, - "RuSciBenchOECDClassification (rus-Cyrl)": 41.41, - "TNews (cmn-Hans)": 39.19, - "ToxicConversationsClassification": 66.07, - "TweetSentimentExtractionClassification": 56.12, - "Waimai (cmn-Hans)": 82.27 + "Model": "bert-base-25lang-cased", + "AmazonReviewsClassification": 29.39, + "MTOPDomainClassification": 63.63, + "MTOPIntentClassification": 37.86, + "MasakhaNEWSClassification": 63.91, + "MassiveIntentClassification": 37.3, + "MassiveScenarioClassification": 44.47 } ] }, "Clustering": { "v_measure": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "8TagsClustering": 23.24, - "AlloProfClusteringP2P": 56.06, - "AlloProfClusteringS2S": 42.16, - "ArxivClusteringP2P": 38.33, - "ArxivClusteringS2S": 31.55, - "BiorxivClusteringP2P": 33.49, - "BiorxivClusteringS2S": 29.44, - "BlurbsClusteringP2P": 32.46, - "BlurbsClusteringS2S": 14.33, - "GeoreviewClusteringP2P (rus-Cyrl)": 53.37, - "HALClusteringS2S": 23.21, - "MLSUMClusteringP2P (rus-Cyrl)": 37.0, - "MLSUMClusteringP2P": 39.97, - "MLSUMClusteringS2S (rus-Cyrl)": 38.16, - "MLSUMClusteringS2S": 36.55, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 40.36, - "MasakhaNEWSClusteringP2P (eng)": 49.96, - "MasakhaNEWSClusteringP2P (fra-Latn)": 40.85, - "MasakhaNEWSClusteringP2P (hau-Latn)": 19.39, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 33.81, - "MasakhaNEWSClusteringP2P (lin-Latn)": 51.98, - "MasakhaNEWSClusteringP2P (lug-Latn)": 41.88, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 22.23, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.64, - "MasakhaNEWSClusteringP2P (run-Latn)": 48.03, - "MasakhaNEWSClusteringP2P (sna-Latn)": 44.62, - "MasakhaNEWSClusteringP2P (som-Latn)": 27.54, - "MasakhaNEWSClusteringP2P (swa-Latn)": 22.69, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 42.02, - "MasakhaNEWSClusteringP2P (xho-Latn)": 27.68, - "MasakhaNEWSClusteringP2P (yor-Latn)": 27.29, - "MasakhaNEWSClusteringP2P (fra)": 36.58, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 42.28, - "MasakhaNEWSClusteringS2S (eng)": 25.74, - "MasakhaNEWSClusteringS2S (fra-Latn)": 36.5, - "MasakhaNEWSClusteringS2S (hau-Latn)": 9.2, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 33.37, - "MasakhaNEWSClusteringS2S (lin-Latn)": 47.76, - "MasakhaNEWSClusteringS2S (lug-Latn)": 45.15, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 22.08, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 58.42, - "MasakhaNEWSClusteringS2S (run-Latn)": 47.41, - "MasakhaNEWSClusteringS2S (sna-Latn)": 43.0, - "MasakhaNEWSClusteringS2S (som-Latn)": 26.22, - "MasakhaNEWSClusteringS2S (swa-Latn)": 13.53, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.4, - "MasakhaNEWSClusteringS2S (xho-Latn)": 21.03, - "MasakhaNEWSClusteringS2S (yor-Latn)": 27.04, - "MasakhaNEWSClusteringS2S (fra)": 33.9, - "MedrxivClusteringP2P": 31.52, - "MedrxivClusteringS2S": 30.87, - "RedditClustering": 42.02, - "RedditClusteringP2P": 50.73, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 48.22, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.68, - "StackExchangeClustering": 49.6, - "StackExchangeClusteringP2P": 31.69, - "TenKGnadClusteringP2P": 36.13, - "TenKGnadClusteringS2S": 22.26, - "TwentyNewsgroupsClustering": 39.28 + "Model": "bert-base-25lang-cased", + "AlloProfClusteringP2P": 53.49, + "AlloProfClusteringS2S": 43.1, + "HALClusteringS2S": 19.78, + "MLSUMClusteringP2P": 40.73, + "MLSUMClusteringS2S": 31.94, + "MasakhaNEWSClusteringP2P": 24.23, + "MasakhaNEWSClusteringS2S": 24.46 } ] }, "PairClassification": { "max_ap": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "CDSC-E (pol-Latn)": 72.22, - "OpusparcusPC (deu-Latn)": 96.63, - "OpusparcusPC (en)": 98.59, - "OpusparcusPC (fin-Latn)": 93.2, - "OpusparcusPC (fra-Latn)": 92.01, - "OpusparcusPC (rus-Cyrl)": 88.25, - "OpusparcusPC (swe-Latn)": 93.99, - "PSC (pol-Latn)": 97.14, - "PawsXPairClassification (deu-Latn)": 53.26, - "PawsXPairClassification (en)": 55.94, - "PawsXPairClassification (spa-Latn)": 54.61, - "PawsXPairClassification (fra-Latn)": 56.94, - "PawsXPairClassification (jpn-Hira)": 48.66, - "PawsXPairClassification (kor-Hang)": 49.69, - "PawsXPairClassification (cmn-Hans)": 54.3, - "SICK-E-PL (pol-Latn)": 71.94, - "SprintDuplicateQuestions": 89.46, - "TERRa (rus-Cyrl)": 58.56, - "TwitterSemEval2015": 62.06, - "TwitterURLCorpus": 83.83 - }, - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "CDSC-E (pol-Latn)": 72.32, - "CDSC-E": 72.33, - "OpusparcusPC (deu-Latn)": 96.63, - "OpusparcusPC (en)": 98.59, - "OpusparcusPC (fin-Latn)": 93.2, - "OpusparcusPC (fra-Latn)": 92.01, - "OpusparcusPC (rus-Cyrl)": 88.25, - "OpusparcusPC (swe-Latn)": 93.99, - "OpusparcusPC (fr)": 92.01, - "PPC": 92.36, - "PSC (pol-Latn)": 97.14, - "PSC": 97.14, - "PawsXPairClassification (deu-Latn)": 53.34, - "PawsXPairClassification (en)": 55.94, - "PawsXPairClassification (spa-Latn)": 54.61, - "PawsXPairClassification (fra-Latn)": 57.13, - "PawsXPairClassification (jpn-Hira)": 48.84, - "PawsXPairClassification (kor-Hang)": 49.86, - "PawsXPairClassification (cmn-Hans)": 54.59, - "PawsXPairClassification (fr)": 57.03, - "SICK-E-PL (pol-Latn)": 71.94, - "SICK-E-PL": 71.94, - "SprintDuplicateQuestions": 91.15, - "TERRa (rus-Cyrl)": 58.56, - "TwitterSemEval2015": 65.06, - "TwitterURLCorpus": 83.83 + "Model": "bert-base-25lang-cased", + "OpusparcusPC": 86.79, + "PawsXPairClassification": 53.39 }, { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "CDSC-E": 72.22, - "OpusparcusPC (fr)": 92.01, - "PPC": 91.8, - "PSC": 97.14, - "PawsXPairClassification (fr)": 56.94, - "SICK-E-PL": 71.94, - "SprintDuplicateQuestions": 89.46, - "TwitterSemEval2015": 62.06, - "TwitterURLCorpus": 83.83 + "Model": "bert-base-25lang-cased", + "OpusparcusPC": 87.78, + "PawsXPairClassification": 53.4 } ] }, "Reranking": { "map": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "AlloprofReranking (fra-Latn)": 62.42, - "AlloprofReranking": 49.01, - "AskUbuntuDupQuestions": 60.49, - "MMarcoReranking (cmn-Hans)": 16.14, - "MindSmallReranking": 30.37, - "RuBQReranking (rus-Cyrl)": 52.8, - "SciDocsRR": 77.78, - "StackOverflowDupQuestions": 45.85, - "SyntecReranking (fra-Latn)": 72.5, - "SyntecReranking": 75.03, - "T2Reranking (cmn-Hans)": 65.28 + "Model": "bert-base-25lang-cased", + "AlloprofReranking": 36.25, + "SyntecReranking": 53.25 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "AILACasedocs": 13.66, - "AILAStatutes": 20.52, - "ARCChallenge": 6.19, - "AlloprofRetrieval (fra-Latn)": 26.63, - "AlloprofRetrieval": 26.63, - "AlphaNLI": 20.89, - "AppsRetrieval (eng-Latn_python-Code)": 2.0, - "ArguAna": 44.88, - "ArguAna-PL (pol-Latn)": 37.86, - "ArguAna-PL": 37.83, - "BSARDRetrieval (fra-Latn)": 9.6, + "Model": "bert-base-25lang-cased", + "AlloprofRetrieval": 1.6, "BSARDRetrieval": 0.0, - "CQADupstackRetrieval": 30.7, - "ClimateFEVER": 18.49, - "CmedqaRetrieval (cmn-Hans)": 10.78, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 12.53, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 32.53, - "CodeSearchNetCCRetrieval (python-Code)": 44.49, - "CodeSearchNetCCRetrieval (javascript-Code)": 43.56, - "CodeSearchNetCCRetrieval (go-Code)": 28.54, - "CodeSearchNetCCRetrieval (ruby-Code)": 43.84, - "CodeSearchNetCCRetrieval (java-Code)": 26.23, - "CodeSearchNetCCRetrieval (php-Code)": 16.3, - "CodeSearchNetRetrieval (python-Code)": 58.15, - "CodeSearchNetRetrieval (javascript-Code)": 46.41, - "CodeSearchNetRetrieval (go-Code)": 61.49, - "CodeSearchNetRetrieval (ruby-Code)": 56.44, - "CodeSearchNetRetrieval (java-Code)": 32.49, - "CodeSearchNetRetrieval (php-Code)": 45.01, - "CodeTransOceanContest (python-Code_c++-Code)": 25.67, - "CodeTransOceanDL": 11.65, - "CosQA (eng-Latn_python-Code)": 14.24, - "CovidRetrieval (cmn-Hans)": 30.11, - "DBPedia": 22.63, - "DBPedia-PL": 18.0, - "DuRetrieval (cmn-Hans)": 34.72, - "EcomRetrieval (cmn-Hans)": 13.32, - "FEVER": 52.66, - "FiQA-PL (pol-Latn)": 12.49, - "FiQA-PL": 12.49, - "FiQA2018": 20.33, - "GerDaLIRSmall (deu-Latn)": 2.62, - "HellaSwag": 16.98, - "HotpotQA": 30.01, - "HotpotQA-PL": 22.76, - "LEMBNarrativeQARetrieval": 13.82, - "LEMBQMSumRetrieval": 11.02, - "LEMBSummScreenFDRetrieval": 38.12, - "LEMBWikimQARetrieval": 40.84, - "LeCaRDv2 (zho-Hans)": 32.03, - "LegalBenchConsumerContractsQA": 49.81, - "LegalBenchCorporateLobbying": 88.51, - "LegalQuAD (deu-Latn)": 13.31, - "LegalSummarization": 54.97, - "MMarcoRetrieval (cmn-Hans)": 46.62, - "MSMARCO": 23.72, - "MSMARCO-PL": 10.39, - "MedicalRetrieval (cmn-Hans)": 15.46, - "MintakaRetrieval (ara-Arab)": 12.61, - "MintakaRetrieval (deu-Latn)": 21.77, - "MintakaRetrieval (spa-Latn)": 21.59, - "MintakaRetrieval (fra-Latn)": 21.53, - "MintakaRetrieval (hin-Deva)": 16.76, - "MintakaRetrieval (ita-Latn)": 22.23, - "MintakaRetrieval (jpn-Hira)": 14.33, - "MintakaRetrieval (por-Latn)": 22.52, - "MintakaRetrieval (fr)": 21.53, - "NFCorpus": 23.45, - "NFCorpus-PL (pol-Latn)": 17.17, - "NFCorpus-PL": 17.16, - "NQ": 29.8, - "NQ-PL": 12.56, - "PIQA": 15.79, - "Quail": 2.96, - "Quora-PL": 77.18, - "QuoraRetrieval": 86.55, - "RARbCode": 8.48, - "RARbMath": 30.02, - "RiaNewsRetrieval (rus-Cyrl)": 44.82, - "RuBQRetrieval (rus-Cyrl)": 29.7, - "SCIDOCS": 0.03, - "SCIDOCS-PL (pol-Latn)": 10.26, - "SCIDOCS-PL": 10.26, - "SIQA": 0.88, - "SciFact": 48.37, - "SciFact-PL (pol-Latn)": 40.24, - "SciFact-PL": 40.24, - "SpartQA": 4.94, - "StackOverflowQA": 37.94, - "SyntecRetrieval (fra-Latn)": 65.54, - "SyntecRetrieval": 65.54, - "SyntheticText2SQL (eng-Latn_sql-Code)": 30.57, - "T2Retrieval (cmn-Hans)": 30.31, - "TRECCOVID": 39.12, - "TRECCOVID-PL (pol-Latn)": 34.23, - "TRECCOVID-PL": 34.38, - "TempReasonL1": 1.43, - "TempReasonL2Fact": 6.21, - "TempReasonL2Pure": 0.22, - "TempReasonL3Fact": 6.77, - "TempReasonL3Pure": 4.9, - "Touche2020": 16.06, - "VideoRetrieval (cmn-Hans)": 14.71, - "WinoGrande": 46.52, - "XPQARetrieval (ara-Arab_ara-Arab)": 22.97, - "XPQARetrieval (eng-Latn_ara-Arab)": 17.17, - "XPQARetrieval (ara-Arab_eng-Latn)": 25.5, - "XPQARetrieval (deu-Latn_deu-Latn)": 42.62, - "XPQARetrieval (eng-Latn_deu-Latn)": 26.52, - "XPQARetrieval (deu-Latn_eng-Latn)": 48.73, - "XPQARetrieval (spa-Latn_spa-Latn)": 38.24, - "XPQARetrieval (eng-Latn_spa-Latn)": 26.09, - "XPQARetrieval (spa-Latn_eng-Latn)": 41.51, - "XPQARetrieval (fra-Latn_fra-Latn)": 42.51, - "XPQARetrieval (eng-Latn_fra-Latn)": 26.09, - "XPQARetrieval (fra-Latn_eng-Latn)": 43.08, - "XPQARetrieval (hin-Deva_hin-Deva)": 52.09, - "XPQARetrieval (eng-Latn_hin-Deva)": 24.08, - "XPQARetrieval (hin-Deva_eng-Latn)": 49.11, - "XPQARetrieval (ita-Latn_ita-Latn)": 51.63, - "XPQARetrieval (eng-Latn_ita-Latn)": 29.34, - "XPQARetrieval (ita-Latn_eng-Latn)": 46.53, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 51.57, - "XPQARetrieval (eng-Latn_jpn-Hira)": 23.87, - "XPQARetrieval (jpn-Hira_eng-Latn)": 44.93, - "XPQARetrieval (kor-Hang_kor-Hang)": 21.36, - "XPQARetrieval (eng-Latn_kor-Hang)": 21.51, - "XPQARetrieval (kor-Hang_eng-Latn)": 22.59, - "XPQARetrieval (pol-Latn_pol-Latn)": 28.5, - "XPQARetrieval (eng-Latn_pol-Latn)": 17.08, - "XPQARetrieval (pol-Latn_eng-Latn)": 26.54, - "XPQARetrieval (por-Latn_por-Latn)": 32.33, - "XPQARetrieval (eng-Latn_por-Latn)": 19.76, - "XPQARetrieval (por-Latn_eng-Latn)": 34.21, - "XPQARetrieval (tam-Taml_tam-Taml)": 6.37, - "XPQARetrieval (eng-Latn_tam-Taml)": 5.36, - "XPQARetrieval (tam-Taml_eng-Latn)": 9.03, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 44.16, - "XPQARetrieval (eng-Latn_cmn-Hans)": 19.03, - "XPQARetrieval (cmn-Hans_eng-Latn)": 40.08, - "XPQARetrieval (fr)": 42.51 - }, - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "LEMBNeedleRetrieval": 13.5, - "LEMBPasskeyRetrieval": 8.25 + "MintakaRetrieval": 3.55, + "SyntecRetrieval": 18.95, + "XPQARetrieval": 18.46 } ] }, "STS": { "cosine_spearman": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "AFQMC (cmn-Hans)": 14.3, - "ATEC (cmn-Hans)": 18.42, - "BIOSSES": 74.18, - "BQ (cmn-Hans)": 38.53, - "CDSC-R (pol-Latn)": 88.98, - "LCQMC (cmn-Hans)": 63.96, - "PAWSX (cmn-Hans)": 10.13, - "RUParaPhraserSTS (rus-Cyrl)": 61.87, - "RuSTSBenchmarkSTS (rus-Cyrl)": 79.55, - "SICK-R": 79.61, - "SICK-R-PL (pol-Latn)": 68.77, - "SICKFr (fra-Latn)": 75.1, - "STS12": 76.02, - "STS13": 80.7, - "STS14": 78.85, - "STS15": 85.84, - "STS16": 81.05, - "STS17 (spa-Latn)": 85.56, - "STS17 (nld-Latn_eng-Latn)": 81.71, - "STS17 (kor-Hang)": 77.03, - "STS17 (eng-Latn_ara-Arab)": 81.22, - "STS17 (ita-Latn_eng-Latn)": 82.35, - "STS17 (eng-Latn_tur-Latn)": 76.74, - "STS17 (eng-Latn_deu-Latn)": 84.22, - "STS17 (fra-Latn_eng-Latn)": 76.59, - "STS17 (en-en)": 86.87, - "STS17 (ara-Arab)": 79.16, - "STS17 (spa-Latn_eng-Latn)": 84.44, - "STS22 (pol-Latn)": 33.74, - "STS22 (spa-Latn)": 56.56, - "STS22 (en)": 62.07, - "STS22 (fra-Latn)": 70.55, - "STS22 (spa-Latn_eng-Latn)": 67.33, - "STS22 (fra-Latn_pol-Latn)": 50.71, - "STS22 (pol-Latn_eng-Latn)": 69.02, - "STS22 (deu-Latn_fra-Latn)": 51.73, - "STS22 (deu-Latn)": 44.64, - "STS22 (ara-Arab)": 46.2, - "STS22 (deu-Latn_pol-Latn)": 44.22, - "STS22 (cmn-Hans_eng-Latn)": 65.71, - "STS22 (ita-Latn)": 55.22, - "STS22 (tur-Latn)": 53.39, - "STS22 (rus-Cyrl)": 57.08, - "STS22 (cmn-Hans)": 58.75, - "STS22 (spa-Latn_ita-Latn)": 47.67, - "STS22 (deu-Latn_eng-Latn)": 52.65, - "STSB (cmn-Hans)": 78.91, - "STSBenchmark": 84.42, - "STSBenchmarkMultilingualSTS (spa-Latn)": 81.1, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 80.47, - "STSBenchmarkMultilingualSTS (en)": 84.42, - "STSBenchmarkMultilingualSTS (deu-Latn)": 78.87, - "STSBenchmarkMultilingualSTS (nld-Latn)": 79.54, - "STSBenchmarkMultilingualSTS (pol-Latn)": 78.29, - "STSBenchmarkMultilingualSTS (fra-Latn)": 79.9, - "STSBenchmarkMultilingualSTS (por-Latn)": 80.16, - "STSBenchmarkMultilingualSTS (ita-Latn)": 80.39, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 79.32 - }, - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "BIOSSES": 74.18, - "CDSC-R": 88.98, - "SICK-R": 79.61, - "SICK-R-PL": 68.77, - "SICKFr": 75.1, - "STS12": 76.02, - "STS13": 80.7, - "STS14": 78.85, - "STS15": 85.84, - "STS16": 81.05, - "STS17 (ar-ar)": 79.16, - "STS17 (en-ar)": 81.22, - "STS17 (en-de)": 84.22, - "STS17 (en-en)": 86.87, - "STS17 (en-tr)": 76.74, - "STS17 (es-en)": 84.44, - "STS17 (es-es)": 85.56, - "STS17 (fr-en)": 76.59, - "STS17 (it-en)": 82.35, - "STS17 (ko-ko)": 77.03, - "STS17 (nl-en)": 81.71, - "STS22 (pl)": 33.73, - "STS22 (fr)": 70.55, - "STSBenchmark": 84.42, - "STSBenchmarkMultilingualSTS (fr)": 79.9 + "Model": "bert-base-25lang-cased", + "SICKFr": 58.76, + "STS22": 38.77, + "STSBenchmarkMultilingualSTS": 52.25 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "SummEval": 30.67, - "SummEvalFr (fra-Latn)": 29.2 - }, - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "SummEval": 30.67, - "SummEvalFr (fra-Latn)": 29.2 - }, - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "SummEval": 30.67, - "SummEvalFr": 29.2 + "Model": "bert-base-25lang-cased", + "SummEvalFr": 28.84 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "CEDRClassification (rus-Cyrl)": 37.76, - "SensitiveTopicsClassification (rus-Cyrl)": 24.84 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "paraphrase-multilingual-MiniLM-L12-v2", - "Core17InstructionRetrieval": -0.62, - "News21InstructionRetrieval": -0.37, - "Robust04InstructionRetrieval": -2.94 - } - ] + "p-MRR": [] } }, - "electra-small-nordic": { + "bert-base-multilingual-cased": { "BitextMining": { - "f1": [ - { - "Model": "electra-small-nordic", - "BornholmBitextMining": 1.44 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "electra-small-nordic", - "AngryTweetsClassification": 47.91, - "DKHateClassification": 59.45, - "DanishPoliticalCommentsClassification": 31.89, - "LccSentimentClassification": 47.93, - "MassiveIntentClassification (da)": 26.3, - "MassiveIntentClassification (nb)": 24.6, - "MassiveIntentClassification (sv)": 27.58, - "MassiveScenarioClassification (da)": 28.93, - "MassiveScenarioClassification (nb)": 27.3, - "MassiveScenarioClassification (sv)": 29.93, - "NoRecClassification": 45.44, - "NordicLangClassification": 57.82, - "NorwegianParliament": 53.25, - "ScalaDaClassification": 70.41, - "ScalaNbClassification": 75.28 + "Model": "bert-base-multilingual-cased", + "AmazonReviewsClassification": 29.39, + "MTOPDomainClassification": 63.61, + "MTOPIntentClassification": 37.84, + "MasakhaNEWSClassification": 64.0, + "MassiveIntentClassification": 37.3, + "MassiveScenarioClassification": 44.47 } ] }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "cross-en-de-roberta-sentence-transformer": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, "Clustering": { "v_measure": [ { - "Model": "cross-en-de-roberta-sentence-transformer", - "BlurbsClusteringP2P": 30.82, - "BlurbsClusteringS2S": 12.69, - "TenKGnadClusteringP2P": 23.5, - "TenKGnadClusteringS2S": 10.94 + "Model": "bert-base-multilingual-cased", + "AlloProfClusteringP2P": 51.5, + "AlloProfClusteringS2S": 43.06, + "HALClusteringS2S": 20.81, + "MLSUMClusteringP2P": 40.9, + "MLSUMClusteringS2S": 31.8, + "MasakhaNEWSClusteringP2P": 24.23, + "MasakhaNEWSClusteringS2S": 24.46 } ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "bert-base-multilingual-cased", + "OpusparcusPC": 86.77, + "PawsXPairClassification": 53.39 + }, + { + "Model": "bert-base-multilingual-cased", + "OpusparcusPC": 87.76, + "PawsXPairClassification": 53.41 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bert-base-multilingual-cased", + "AlloprofReranking": 36.23, + "SyntecReranking": 53.25 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "bert-base-multilingual-cased", + "AlloprofRetrieval": 1.63, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 3.55, + "SyntecRetrieval": 18.95, + "XPQARetrieval": 18.49 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bert-base-multilingual-cased", + "SICKFr": 58.75, + "STS22": 39.05, + "STSBenchmarkMultilingualSTS": 52.25 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bert-base-multilingual-cased", + "SummEvalFr": 28.81 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -5785,150 +6383,87 @@ "p-MRR": [] } }, - "sentence-t5-xxl": { + "bert-base-multilingual-uncased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "sentence-t5-xxl", - "AmazonCounterfactualClassification (en)": 77.07, - "AmazonPolarityClassification": 92.79, - "AmazonReviewsClassification (en)": 48.93, - "AmazonReviewsClassification (fr)": 46.09, - "Banking77Classification": 82.31, - "EmotionClassification": 48.57, - "ImdbClassification": 90.23, - "MTOPDomainClassification (en)": 92.49, - "MTOPDomainClassification (fr)": 86.2, - "MTOPIntentClassification (en)": 68.33, - "MTOPIntentClassification (fr)": 58.33, - "MasakhaNEWSClassification (fra)": 79.1, - "MassiveIntentClassification (en)": 73.44, - "MassiveIntentClassification (fr)": 65.91, - "MassiveScenarioClassification (en)": 74.82, - "MassiveScenarioClassification (fr)": 68.53, - "ToxicConversationsClassification": 70.04, - "TweetSentimentExtractionClassification": 62.01 + "Model": "bert-base-multilingual-uncased", + "AmazonReviewsClassification": 29.02, + "MTOPDomainClassification": 64.49, + "MTOPIntentClassification": 39.4, + "MasakhaNEWSClassification": 75.69, + "MassiveIntentClassification": 38.01, + "MassiveScenarioClassification": 43.63 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-t5-xxl", - "AlloProfClusteringP2P": 60.98, - "AlloProfClusteringS2S": 43.5, - "ArxivClusteringP2P": 42.89, - "ArxivClusteringS2S": 33.47, - "BiorxivClusteringP2P": 36.53, - "BiorxivClusteringS2S": 28.66, - "BlurbsClusteringP2P": 39.91, - "BlurbsClusteringS2S": 15.94, - "HALClusteringS2S": 21.4, - "MLSUMClusteringP2P": 42.24, - "MLSUMClusteringS2S": 35.25, - "MasakhaNEWSClusteringP2P (fra)": 61.15, - "MasakhaNEWSClusteringS2S (fra)": 38.24, - "MedrxivClusteringP2P": 32.09, - "MedrxivClusteringS2S": 26.82, - "RedditClustering": 58.99, - "RedditClusteringP2P": 64.46, - "StackExchangeClustering": 70.78, - "StackExchangeClusteringP2P": 35.25, - "TenKGnadClusteringP2P": 43.43, - "TenKGnadClusteringS2S": 19.69, - "TwentyNewsgroupsClustering": 50.93 + "Model": "bert-base-multilingual-uncased", + "AlloProfClusteringP2P": 60.66, + "AlloProfClusteringS2S": 35.05, + "HALClusteringS2S": 20.9, + "MLSUMClusteringP2P": 43.5, + "MLSUMClusteringS2S": 30.99, + "MasakhaNEWSClusteringP2P": 49.71, + "MasakhaNEWSClusteringS2S": 42.23 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sentence-t5-xxl", - "OpusparcusPC (fr)": 93.94, - "PawsXPairClassification (fr)": 63.98, - "SprintDuplicateQuestions": 88.89, - "TwitterSemEval2015": 80.28, - "TwitterURLCorpus": 86.01 + "Model": "bert-base-multilingual-uncased", + "OpusparcusPC": 87.43, + "PawsXPairClassification": 53.22 }, { - "Model": "sentence-t5-xxl", - "OpusparcusPC (fr)": 93.94, - "PawsXPairClassification (fr)": 64.01, - "SprintDuplicateQuestions": 88.91, - "TwitterSemEval2015": 80.28, - "TwitterURLCorpus": 86.01 + "Model": "bert-base-multilingual-uncased", + "OpusparcusPC": 87.53, + "PawsXPairClassification": 53.33 } ] }, "Reranking": { "map": [ { - "Model": "sentence-t5-xxl", - "AlloprofReranking": 68.36, - "AskUbuntuDupQuestions": 66.16, - "MindSmallReranking": 30.6, - "SciDocsRR": 76.09, - "StackOverflowDupQuestions": 52.85, - "SyntecReranking": 85.15 + "Model": "bert-base-multilingual-uncased", + "AlloprofReranking": 38.85, + "SyntecReranking": 66.4 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-t5-xxl", - "AlloprofRetrieval": 45.75, - "ArguAna": 39.85, - "BSARDRetrieval": 3.33, - "CQADupstackRetrieval": 44.65, - "ClimateFEVER": 14.63, - "DBPedia": 39.19, - "FEVER": 51.2, - "FiQA2018": 46.68, - "HotpotQA": 42.14, - "MSMARCO": 27.67, - "MintakaRetrieval (fr)": 34.93, - "NFCorpus": 35.08, - "NQ": 52.87, - "QuoraRetrieval": 85.96, - "SCIDOCS": 17.17, - "SciFact": 55.38, - "SyntecRetrieval": 78.97, - "TRECCOVID": 59.48, - "Touche2020": 21.65, - "XPQARetrieval (fr)": 56.2 + "Model": "bert-base-multilingual-uncased", + "AlloprofRetrieval": 5.51, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 2.87, + "SyntecRetrieval": 34.95, + "XPQARetrieval": 26.12 } ] }, "STS": { "cosine_spearman": [ { - "Model": "sentence-t5-xxl", - "BIOSSES": 80.43, - "SICK-R": 80.47, - "SICKFr": 77.07, - "STS12": 78.85, - "STS13": 88.94, - "STS14": 84.86, - "STS15": 89.32, - "STS16": 84.67, - "STS17 (en-en)": 89.46, - "STS22 (en)": 65.33, - "STS22 (fr)": 76.8, - "STSBenchmark": 84.01, - "STSBenchmarkMultilingualSTS (fr)": 81.24 + "Model": "bert-base-multilingual-uncased", + "SICKFr": 58.26, + "STS22": 56.47, + "STSBenchmarkMultilingualSTS": 54.97 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "sentence-t5-xxl", - "SummEval": 30.08, - "SummEvalFr": 30.39 + "Model": "bert-base-multilingual-uncased", + "SummEvalFr": 30.72 } ] }, @@ -5939,34 +6474,30 @@ "p-MRR": [] } }, - "nb-bert-base": { + "bert-base-swedish-cased": { "BitextMining": { "f1": [ { - "Model": "nb-bert-base", - "BornholmBitextMining": 9.88 + "Model": "bert-base-swedish-cased", + "BornholmBitextMining": 6.6 } ] }, "Classification": { "accuracy": [ { - "Model": "nb-bert-base", - "AngryTweetsClassification": 52.14, - "DKHateClassification": 61.73, - "DanishPoliticalCommentsClassification": 34.84, - "LccSentimentClassification": 51.4, - "MassiveIntentClassification (da)": 56.69, - "MassiveIntentClassification (nb)": 60.67, - "MassiveIntentClassification (sv)": 53.89, - "MassiveScenarioClassification (da)": 61.93, - "MassiveScenarioClassification (nb)": 67.31, - "MassiveScenarioClassification (sv)": 55.37, - "NoRecClassification": 51.32, - "NordicLangClassification": 84.69, - "NorwegianParliament": 57.41, - "ScalaDaClassification": 57.99, - "ScalaNbClassification": 62.25 + "Model": "bert-base-swedish-cased", + "AngryTweetsClassification": 44.58, + "DKHateClassification": 55.53, + "DanishPoliticalCommentsClassification": 28.97, + "LccSentimentClassification": 41.2, + "MassiveIntentClassification": 52.75, + "MassiveScenarioClassification": 56.09, + "NoRecClassification": 43.91, + "NordicLangClassification": 62.45, + "NorwegianParliament": 57.56, + "ScalaDaClassification": 53.53, + "ScalaNbClassification": 53.63 } ] }, @@ -5995,83 +6526,120 @@ "p-MRR": [] } }, - "silver-retriever-base-v1": { + "bert-base-uncased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "silver-retriever-base-v1", - "AllegroReviews": 33.35, - "CBD": 68.51, - "MassiveIntentClassification (pl)": 66.63, - "MassiveScenarioClassification (pl)": 69.97, - "PAC": 66.26, - "PolEmo2.0-IN": 63.52, - "PolEmo2.0-OUT": 44.7 + "Model": "bert-base-uncased", + "AmazonCounterfactualClassification": 74.25, + "AmazonPolarityClassification": 71.33, + "AmazonReviewsClassification": 33.56, + "Banking77Classification": 63.41, + "EmotionClassification": 35.28, + "ImdbClassification": 65.35, + "MTOPDomainClassification": 82.63, + "MTOPIntentClassification": 68.14, + "MassiveIntentClassification": 59.88, + "MassiveScenarioClassification": 64.28, + "ToxicConversationsClassification": 70.0, + "TweetSentimentExtractionClassification": 51.81 } ] }, "Clustering": { "v_measure": [ { - "Model": "silver-retriever-base-v1", - "8TagsClustering": 31.49 + "Model": "bert-base-uncased", + "ArxivClusteringP2P": 35.19, + "ArxivClusteringS2S": 27.51, + "BiorxivClusteringP2P": 30.12, + "BiorxivClusteringS2S": 24.77, + "MedrxivClusteringP2P": 26.09, + "MedrxivClusteringS2S": 23.6, + "RedditClustering": 27.24, + "RedditClusteringP2P": 43.32, + "StackExchangeClustering": 43.58, + "StackExchangeClusteringP2P": 26.55, + "TwentyNewsgroupsClustering": 23.35 } ] }, "PairClassification": { "max_ap": [ { - "Model": "silver-retriever-base-v1", - "CDSC-E": 67.35, - "PPC": 85.33, - "PSC": 98.46, - "SICK-E-PL": 58.19 + "Model": "bert-base-uncased", + "SprintDuplicateQuestions": 36.81, + "TwitterSemEval2015": 55.9, + "TwitterURLCorpus": 76.29 }, { - "Model": "silver-retriever-base-v1", - "CDSC-E": 67.56, - "PPC": 85.98, - "PSC": 98.6, - "SICK-E-PL": 58.54 + "Model": "bert-base-uncased", + "SprintDuplicateQuestions": 36.81, + "TwitterSemEval2015": 55.9, + "TwitterURLCorpus": 76.29 } ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bert-base-uncased", + "AskUbuntuDupQuestions": 45.84, + "MindSmallReranking": 28.37, + "SciDocsRR": 64.94, + "StackOverflowDupQuestions": 34.62 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "silver-retriever-base-v1", - "ArguAna-PL": 44.12, - "DBPedia-PL": 26.32, - "FiQA-PL": 24.95, - "HotpotQA-PL": 45.13, - "MSMARCO-PL": 25.47, - "NFCorpus-PL": 28.55, - "NQ-PL": 37.9, - "Quora-PL": 77.98, - "SCIDOCS-PL": 10.9, - "SciFact-PL": 54.44, - "TRECCOVID-PL": 46.98 + "Model": "bert-base-uncased", + "ArguAna": 28.29, + "CQADupstackRetrieval": 5.51, + "ClimateFEVER": 5.41, + "DBPedia": 4.13, + "FEVER": 3.3, + "FiQA2018": 2.19, + "HotpotQA": 8.26, + "MSMARCO": 1.91, + "NFCorpus": 4.3, + "NQ": 2.62, + "QuoraRetrieval": 61.03, + "SCIDOCS": 2.82, + "SciFact": 13.34, + "TRECCOVID": 14.74, + "Touche2020": 0.97 } ] }, "STS": { "cosine_spearman": [ { - "Model": "silver-retriever-base-v1", - "CDSC-R": 89.09, - "SICK-R-PL": 67.26, - "STS22 (pl)": 38.69 + "Model": "bert-base-uncased", + "BIOSSES": 54.7, + "SICK-R": 58.65, + "STS12": 30.87, + "STS13": 59.89, + "STS14": 47.73, + "STS15": 60.29, + "STS16": 63.73, + "STS17": 64.1, + "STS22": 56.37, + "STSBenchmark": 47.29 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bert-base-uncased", + "SummEval": 29.82 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -6080,7 +6648,7 @@ "p-MRR": [] } }, - "instructor-base": { + "bge-base-en-v1.5": { "BitextMining": { "f1": [] }, @@ -6088,7 +6656,20 @@ "accuracy": [] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "bge-base-en-v1.5", + "BiorxivClusteringP2P": 39.44, + "BiorxivClusteringS2S": 36.62, + "MedrxivClusteringP2P": 33.21, + "MedrxivClusteringS2S": 31.68, + "RedditClustering": 56.61, + "RedditClusteringP2P": 62.66, + "StackExchangeClustering": 66.11, + "StackExchangeClusteringP2P": 35.24, + "TwentyNewsgroupsClustering": 50.75 + } + ] }, "PairClassification": { "max_ap": [] @@ -6097,7 +6678,26 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "bge-base-en-v1.5", + "ARCChallenge": 9.66, + "AlphaNLI": 10.99, + "HellaSwag": 26.64, + "PIQA": 25.69, + "Quail": 1.42, + "RARbCode": 46.47, + "RARbMath": 46.86, + "SIQA": 0.94, + "SpartQA": 3.37, + "TempReasonL1": 1.07, + "TempReasonL2Fact": 17.23, + "TempReasonL2Pure": 1.29, + "TempReasonL3Fact": 13.36, + "TempReasonL3Pure": 5.2, + "WinoGrande": 13.76 + } + ] }, "STS": { "cosine_spearman": [] @@ -6109,17 +6709,10 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "instructor-base", - "Core17InstructionRetrieval": -1.09, - "News21InstructionRetrieval": -1.78, - "Robust04InstructionRetrieval": -10.42 - } - ] + "p-MRR": [] } }, - "text-search-curie-001": { + "bge-base-en-v1.5-instruct": { "BitextMining": { "f1": [] }, @@ -6138,18 +6731,22 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "text-search-curie-001", - "ArguAna": 46.98, - "ClimateFEVER": 19.4, - "FEVER": 75.6, - "FiQA2018": 45.21, - "HotpotQA": 64.8, - "NFCorpus": 38.01, - "QuoraRetrieval": 67.7, - "SCIDOCS": 17.74, - "SciFact": 74.35, - "TRECCOVID": 56.14, - "Touche2020": 30.9 + "Model": "bge-base-en-v1.5-instruct", + "ARCChallenge": 8.85, + "AlphaNLI": 4.13, + "HellaSwag": 24.03, + "PIQA": 23.03, + "Quail": 1.25, + "RARbCode": 46.32, + "RARbMath": 45.62, + "SIQA": 0.24, + "SpartQA": 2.67, + "TempReasonL1": 0.8, + "TempReasonL2Fact": 16.56, + "TempReasonL2Pure": 1.33, + "TempReasonL3Fact": 12.68, + "TempReasonL3Pure": 5.08, + "WinoGrande": 10.27 } ] }, @@ -6166,89 +6763,89 @@ "p-MRR": [] } }, - "m3e-large": { + "bge-base-zh-v1.5": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "m3e-large", - "AmazonReviewsClassification (zh)": 44.44, - "IFlyTek": 43.96, - "JDReview": 86.92, - "MassiveIntentClassification (zh-CN)": 67.23, - "MassiveScenarioClassification (zh-CN)": 74.88, - "MultilingualSentiment": 72.47, - "OnlineShopping": 89.59, - "TNews": 48.26, - "Waimai": 86.08 + "Model": "bge-base-zh-v1.5", + "AmazonReviewsClassification": 40.15, + "IFlyTek": 48.62, + "JDReview": 83.62, + "MassiveIntentClassification": 67.93, + "MassiveScenarioClassification": 73.98, + "MultilingualSentiment": 70.67, + "OnlineShopping": 91.26, + "TNews": 51.08, + "Waimai": 85.36 } ] }, "Clustering": { "v_measure": [ { - "Model": "m3e-large", - "CLSClusteringP2P": 38.6, - "CLSClusteringS2S": 38.02, - "ThuNewsClusteringP2P": 60.39, - "ThuNewsClusteringS2S": 58.51 + "Model": "bge-base-zh-v1.5", + "CLSClusteringP2P": 39.91, + "CLSClusteringS2S": 37.63, + "ThuNewsClusteringP2P": 58.45, + "ThuNewsClusteringS2S": 54.12 } ] }, "PairClassification": { "max_ap": [ { - "Model": "m3e-large", - "Cmnli": 69.27, - "Ocnli": 59.33 + "Model": "bge-base-zh-v1.5", + "Cmnli": 84.1, + "Ocnli": 75.41 }, { - "Model": "m3e-large", - "Cmnli": 69.27, - "Ocnli": 59.99 + "Model": "bge-base-zh-v1.5", + "Cmnli": 84.11, + "Ocnli": 75.43 } ] }, "Reranking": { "map": [ { - "Model": "m3e-large", - "CMedQAv1": 77.76, - "CMedQAv2": 78.27, - "MMarcoReranking": 16.46, - "T2Reranking": 66.13 + "Model": "bge-base-zh-v1.5", + "CMedQAv1": 80.47, + "CMedQAv2": 84.88, + "MMarcoReranking": 29.74, + "T2Reranking": 66.49 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "m3e-large", - "CmedqaRetrieval": 30.73, - "CovidRetrieval": 61.33, - "DuRetrieval": 74.69, - "EcomRetrieval": 45.18, - "MMarcoRetrieval": 61.06, - "MedicalRetrieval": 48.66, - "T2Retrieval": 72.36, - "VideoRetrieval": 44.02 + "Model": "bge-base-zh-v1.5", + "CmedqaRetrieval": 41.61, + "CovidRetrieval": 74.7, + "DuRetrieval": 85.07, + "EcomRetrieval": 64.25, + "MMarcoRetrieval": 77.69, + "MedicalRetrieval": 56.51, + "T2Retrieval": 83.71, + "VideoRetrieval": 72.35 } ] }, "STS": { "cosine_spearman": [ { - "Model": "m3e-large", - "AFQMC": 36.53, - "ATEC": 41.8, - "BQ": 65.2, - "LCQMC": 74.2, - "PAWSX": 15.95, - "QBQTC": 32.65, - "STS22 (zh)": 62.91, - "STSB": 74.16 + "Model": "bge-base-zh-v1.5", + "AFQMC": 42.4, + "ATEC": 48.17, + "BQ": 61.78, + "LCQMC": 74.45, + "PAWSX": 20.4, + "QBQTC": 36.22, + "STS22": 68.01, + "STSB": 78.31 } ] }, @@ -6262,80 +6859,130 @@ "p-MRR": [] } }, - "st-polish-paraphrase-from-distilroberta": { + "bge-large-en-v1.5": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "st-polish-paraphrase-from-distilroberta", - "AllegroReviews": 34.5, - "CBD": 70.27, - "MassiveIntentClassification (pl)": 64.81, - "MassiveScenarioClassification (pl)": 70.01, - "PAC": 64.6, - "PolEmo2.0-IN": 67.06, - "PolEmo2.0-OUT": 38.58 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "st-polish-paraphrase-from-distilroberta", - "8TagsClustering": 31.68 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "st-polish-paraphrase-from-distilroberta", - "CDSC-E": 75.99, - "PPC": 93.29, - "PSC": 99.1, - "SICK-E-PL": 79.63 - }, + "Model": "bge-large-en-v1.5", + "AILACasedocs": 25.15, + "AILAStatutes": 20.74, + "ARCChallenge": 9.99, + "AlphaNLI": 13.13, + "BrightRetrieval (stackoverflow)": 9.51, + "BrightRetrieval (earth_science)": 24.15, + "BrightRetrieval (aops)": 6.08, + "BrightRetrieval (sustainable_living)": 13.27, + "BrightRetrieval (psychology)": 17.44, + "BrightRetrieval (robotics)": 12.21, + "BrightRetrieval (theoremqa_theorems)": 6.72, + "BrightRetrieval (pony)": 5.64, + "BrightRetrieval (biology)": 11.96, + "BrightRetrieval (theoremqa_questions)": 12.56, + "BrightRetrieval (leetcode)": 26.68, + "BrightRetrieval (economics)": 16.59, + "GerDaLIRSmall": 3.96, + "HellaSwag": 28.5, + "LeCaRDv2": 22.68, + "LegalBenchConsumerContractsQA": 73.52, + "LegalBenchCorporateLobbying": 91.51, + "LegalQuAD": 16.22, + "LegalSummarization": 59.99, + "PIQA": 27.99, + "Quail": 1.83, + "RARbCode": 48.12, + "RARbMath": 57.36, + "SIQA": 1.04, + "SpartQA": 2.99, + "TempReasonL1": 1.46, + "TempReasonL2Fact": 24.25, + "TempReasonL2Pure": 2.35, + "TempReasonL3Fact": 20.64, + "TempReasonL3Pure": 6.67, + "WinoGrande": 19.18 + } + ], + "recall_at_1": [ { - "Model": "st-polish-paraphrase-from-distilroberta", - "CDSC-E": 75.99, - "PPC": 93.31, - "PSC": 99.1, - "SICK-E-PL": 79.63 + "Model": "bge-large-en-v1.5", + "BrightRetrieval (pony)": 0.36, + "BrightRetrieval (psychology)": 11.58, + "BrightRetrieval (stackoverflow)": 13.25, + "BrightRetrieval (robotics)": 10.89, + "BrightRetrieval (earth_science)": 27.73, + "BrightRetrieval (biology)": 16.42, + "BrightRetrieval (economics)": 20.87, + "BrightRetrieval (sustainable_living)": 16.9 } ] }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "bge-large-en-v1.5-instruct": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, "Reranking": { "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "st-polish-paraphrase-from-distilroberta", - "ArguAna-PL": 49.42, - "DBPedia-PL": 19.82, - "FiQA-PL": 19.58, - "HotpotQA-PL": 23.47, - "MSMARCO-PL": 16.51, - "NFCorpus-PL": 22.49, - "NQ-PL": 19.83, - "Quora-PL": 81.17, - "SCIDOCS-PL": 12.15, - "SciFact-PL": 49.49, - "TRECCOVID-PL": 38.97 + "Model": "bge-large-en-v1.5-instruct", + "ARCChallenge": 8.86, + "AlphaNLI": 0.86, + "HellaSwag": 26.24, + "PIQA": 23.26, + "Quail": 2.72, + "RARbCode": 45.25, + "RARbMath": 49.82, + "SIQA": 0.59, + "SpartQA": 2.34, + "TempReasonL1": 1.17, + "TempReasonL2Fact": 21.19, + "TempReasonL2Pure": 2.1, + "TempReasonL3Fact": 17.59, + "TempReasonL3Pure": 5.99, + "WinoGrande": 10.31 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "st-polish-paraphrase-from-distilroberta", - "CDSC-R": 89.62, - "SICK-R-PL": 76.37, - "STS22 (pl)": 40.36 - } - ] + "cosine_spearman": [] }, "Summarization": { "cosine_spearman": [] @@ -6347,89 +6994,94 @@ "p-MRR": [] } }, - "Cohere-embed-multilingual-v3.0": { + "bge-large-zh-noinstruct": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AmazonReviewsClassification (fr)": 41.89, - "MTOPDomainClassification (fr)": 86.23, - "MTOPIntentClassification (fr)": 61.07, - "MasakhaNEWSClassification (fra)": 83.06, - "MassiveIntentClassification (fr)": 62.94, - "MassiveScenarioClassification (fr)": 67.29 + "Model": "bge-large-zh-noinstruct", + "AmazonReviewsClassification": 41.94, + "IFlyTek": 45.32, + "JDReview": 85.38, + "MassiveIntentClassification": 66.96, + "MassiveScenarioClassification": 73.39, + "MultilingualSentiment": 73.7, + "OnlineShopping": 91.66, + "TNews": 52.05, + "Waimai": 86.83 } ] }, "Clustering": { "v_measure": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloProfClusteringP2P": 63.53, - "AlloProfClusteringS2S": 36.18, - "HALClusteringS2S": 19.9, - "MLSUMClusteringP2P": 45.08, - "MLSUMClusteringS2S": 34.75, - "MasakhaNEWSClusteringP2P (fra)": 53.18, - "MasakhaNEWSClusteringS2S (fra)": 32.31 + "Model": "bge-large-zh-noinstruct", + "CLSClusteringP2P": 41.23, + "CLSClusteringS2S": 40.04, + "ThuNewsClusteringP2P": 62.03, + "ThuNewsClusteringS2S": 56.75 } ] }, "PairClassification": { "max_ap": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "OpusparcusPC (fr)": 94.08, - "PawsXPairClassification (fr)": 61.26 + "Model": "bge-large-zh-noinstruct", + "Cmnli": 82.17, + "Ocnli": 71.37 }, { - "Model": "Cohere-embed-multilingual-v3.0", - "OpusparcusPC (fr)": 94.08, - "PawsXPairClassification (fr)": 61.26 + "Model": "bge-large-zh-noinstruct", + "Cmnli": 82.18, + "Ocnli": 71.37 } ] }, "Reranking": { "map": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofReranking": 51.01, - "SyntecReranking": 85.72 + "Model": "bge-large-zh-noinstruct", + "CMedQAv1": 81.72, + "CMedQAv2": 84.64, + "MMarcoReranking": 27.1, + "T2Reranking": 66.16 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofRetrieval": 38.36, - "BSARDRetrieval": 0.14, - "MintakaRetrieval (fr)": 25.44, - "SyntecRetrieval": 79.27, - "XPQARetrieval (fr)": 58.87 + "Model": "bge-large-zh-noinstruct", + "CmedqaRetrieval": 41.03, + "CovidRetrieval": 75.07, + "DuRetrieval": 84.68, + "EcomRetrieval": 65.6, + "MMarcoRetrieval": 81.38, + "MedicalRetrieval": 58.28, + "T2Retrieval": 84.39, + "VideoRetrieval": 73.93 } ] }, "STS": { "cosine_spearman": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "SICKFr": 79.23, - "STS22 (fr)": 82.76, - "STSBenchmarkMultilingualSTS (fr)": 81.84 + "Model": "bge-large-zh-noinstruct", + "AFQMC": 43.06, + "ATEC": 48.29, + "BQ": 60.53, + "LCQMC": 74.71, + "PAWSX": 16.64, + "QBQTC": 35.2, + "STS22": 67.19, + "STSB": 78.41 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "Cohere-embed-multilingual-v3.0", - "SummEvalFr": 31.26 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -6438,120 +7090,94 @@ "p-MRR": [] } }, - "nomic-embed-text-v1.5-64": { + "bge-large-zh-v1.5": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "nomic-embed-text-v1.5-64", - "AmazonCounterfactualClassification (en)": 66.85, - "AmazonPolarityClassification": 85.92, - "AmazonReviewsClassification (en)": 41.02, - "Banking77Classification": 80.63, - "EmotionClassification": 40.55, - "ImdbClassification": 76.6, - "MTOPDomainClassification (en)": 86.31, - "MTOPIntentClassification (en)": 62.77, - "MassiveIntentClassification (en)": 64.95, - "MassiveScenarioClassification (en)": 70.38, - "ToxicConversationsClassification": 66.53, - "TweetSentimentExtractionClassification": 55.23 + "Model": "bge-large-zh-v1.5", + "AmazonReviewsClassification": 41.38, + "IFlyTek": 48.74, + "JDReview": 85.14, + "MassiveIntentClassification": 68.84, + "MassiveScenarioClassification": 74.7, + "MultilingualSentiment": 72.97, + "OnlineShopping": 91.43, + "TNews": 52.1, + "Waimai": 86.9 } ] }, "Clustering": { "v_measure": [ { - "Model": "nomic-embed-text-v1.5-64", - "ArxivClusteringP2P": 41.8, - "ArxivClusteringS2S": 32.41, - "BiorxivClusteringP2P": 34.81, - "BiorxivClusteringS2S": 28.59, - "MedrxivClusteringP2P": 32.73, - "MedrxivClusteringS2S": 29.91, - "RedditClustering": 50.31, - "RedditClusteringP2P": 56.57, - "StackExchangeClustering": 57.99, - "StackExchangeClusteringP2P": 33.64, - "TwentyNewsgroupsClustering": 44.61 + "Model": "bge-large-zh-v1.5", + "CLSClusteringP2P": 41.44, + "CLSClusteringS2S": 38.33, + "ThuNewsClusteringP2P": 59.61, + "ThuNewsClusteringS2S": 56.58 } ] }, "PairClassification": { "max_ap": [ { - "Model": "nomic-embed-text-v1.5-64", - "SprintDuplicateQuestions": 90.06, - "TwitterSemEval2015": 71.68, - "TwitterURLCorpus": 85.03 + "Model": "bge-large-zh-v1.5", + "Cmnli": 85.27, + "Ocnli": 77.94 }, { - "Model": "nomic-embed-text-v1.5-64", - "SprintDuplicateQuestions": 90.06, - "TwitterSemEval2015": 71.68, - "TwitterURLCorpus": 85.03 + "Model": "bge-large-zh-v1.5", + "Cmnli": 85.29, + "Ocnli": 77.96 } ] }, "Reranking": { "map": [ { - "Model": "nomic-embed-text-v1.5-64", - "AskUbuntuDupQuestions": 60.79, - "MindSmallReranking": 29.7, - "SciDocsRR": 75.79, - "StackOverflowDupQuestions": 47.42 + "Model": "bge-large-zh-v1.5", + "CMedQAv1": 83.45, + "CMedQAv2": 85.44, + "MMarcoReranking": 28.74, + "T2Reranking": 65.74 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "nomic-embed-text-v1.5-64", - "ArguAna": 37.16, - "CQADupstackRetrieval": 28.72, - "ClimateFEVER": 31.48, - "DBPedia": 28.19, - "FEVER": 70.24, - "FiQA2018": 25.78, - "HotpotQA": 43.07, - "MSMARCO": 35.95, - "NFCorpus": 26.03, - "NQ": 45.54, - "QuoraRetrieval": 85.83, - "SCIDOCS": 12.09, - "SciFact": 52.71, - "TRECCOVID": 67.83, - "Touche2020": 23.13 + "Model": "bge-large-zh-v1.5", + "CmedqaRetrieval": 42.57, + "CovidRetrieval": 73.35, + "DuRetrieval": 86.32, + "EcomRetrieval": 65.33, + "MMarcoRetrieval": 79.23, + "MedicalRetrieval": 59.59, + "T2Retrieval": 83.99, + "VideoRetrieval": 73.32 } ] }, "STS": { "cosine_spearman": [ { - "Model": "nomic-embed-text-v1.5-64", - "BIOSSES": 77.18, - "SICK-R": 78.76, - "STS12": 77.3, - "STS13": 84.18, - "STS14": 79.37, - "STS15": 84.69, - "STS16": 83.36, - "STS17 (en-en)": 85.73, - "STS22 (en)": 63.83, - "STSBenchmark": 83.46 + "Model": "bge-large-zh-v1.5", + "AFQMC": 44.36, + "ATEC": 49.54, + "BQ": 62.94, + "LCQMC": 74.33, + "PAWSX": 33.92, + "QBQTC": 37.29, + "STS22": 68.94, + "STSB": 78.7 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "nomic-embed-text-v1.5-64", - "SummEval": 28.41 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -6560,297 +7186,124 @@ "p-MRR": [] } }, - "distiluse-base-multilingual-cased-v2": { + "bge-m3": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "bge-m3", + "Tatoeba (rus-eng)": 93.42 + } + ] }, "Classification": { "accuracy": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "AllegroReviews": 28.03, - "AmazonCounterfactualClassification (de)": 68.14, - "AmazonCounterfactualClassification (en)": 71.81, - "AmazonCounterfactualClassification (en-ext)": 72.96, - "AmazonCounterfactualClassification (ja)": 65.39, - "AmazonPolarityClassification": 68.0, - "AmazonReviewsClassification (de)": 35.03, - "AmazonReviewsClassification (en)": 35.45, - "AmazonReviewsClassification (es)": 36.24, - "AmazonReviewsClassification (fr)": 35.7, - "AmazonReviewsClassification (ja)": 31.08, - "AmazonReviewsClassification (zh)": 33.89, - "Banking77Classification": 71.48, - "CBD": 60.0, - "EmotionClassification": 40.04, - "ImdbClassification": 61.52, - "MTOPDomainClassification (de)": 86.19, - "MTOPDomainClassification (en)": 91.59, - "MTOPDomainClassification (es)": 87.75, - "MTOPDomainClassification (fr)": 84.61, - "MTOPDomainClassification (hi)": 76.41, - "MTOPDomainClassification (th)": 73.62, - "MTOPIntentClassification (de)": 59.21, - "MTOPIntentClassification (en)": 66.4, - "MTOPIntentClassification (es)": 57.21, - "MTOPIntentClassification (fr)": 53.41, - "MTOPIntentClassification (hi)": 45.54, - "MTOPIntentClassification (th)": 47.73, - "MasakhaNEWSClassification (fra)": 76.87, - "MassiveIntentClassification (af)": 40.02, - "MassiveIntentClassification (am)": 2.35, - "MassiveIntentClassification (ar)": 43.14, - "MassiveIntentClassification (az)": 25.6, - "MassiveIntentClassification (bn)": 4.84, - "MassiveIntentClassification (cy)": 15.43, - "MassiveIntentClassification (da)": 52.33, - "MassiveIntentClassification (de)": 51.57, - "MassiveIntentClassification (el)": 49.65, - "MassiveIntentClassification (en)": 66.71, - "MassiveIntentClassification (es)": 56.57, - "MassiveIntentClassification (fa)": 55.36, - "MassiveIntentClassification (fi)": 45.72, - "MassiveIntentClassification (fr)": 57.02, - "MassiveIntentClassification (he)": 46.74, - "MassiveIntentClassification (hi)": 48.55, - "MassiveIntentClassification (hu)": 50.65, - "MassiveIntentClassification (hy)": 40.79, - "MassiveIntentClassification (id)": 56.0, - "MassiveIntentClassification (is)": 16.08, - "MassiveIntentClassification (it)": 57.65, - "MassiveIntentClassification (ja)": 55.33, - "MassiveIntentClassification (jv)": 28.16, - "MassiveIntentClassification (ka)": 29.41, - "MassiveIntentClassification (km)": 4.79, - "MassiveIntentClassification (kn)": 3.37, - "MassiveIntentClassification (ko)": 49.97, - "MassiveIntentClassification (lv)": 44.31, - "MassiveIntentClassification (ml)": 3.24, - "MassiveIntentClassification (mn)": 40.37, - "MassiveIntentClassification (ms)": 47.97, - "MassiveIntentClassification (my)": 38.48, - "MassiveIntentClassification (nb)": 46.01, - "MassiveIntentClassification (nl)": 58.29, - "MassiveIntentClassification (pl)": 53.1, - "MassiveIntentClassification (pt)": 58.63, - "MassiveIntentClassification (ro)": 50.63, - "MassiveIntentClassification (ru)": 57.96, - "MassiveIntentClassification (sl)": 50.66, - "MassiveIntentClassification (sq)": 50.25, - "MassiveIntentClassification (sv)": 52.41, - "MassiveIntentClassification (sw)": 19.29, - "MassiveIntentClassification (ta)": 3.79, - "MassiveIntentClassification (te)": 3.36, - "MassiveIntentClassification (th)": 45.28, - "MassiveIntentClassification (tl)": 28.44, - "MassiveIntentClassification (tr)": 50.47, - "MassiveIntentClassification (ur)": 46.03, - "MassiveIntentClassification (vi)": 45.25, - "MassiveIntentClassification (zh-CN)": 59.22, - "MassiveIntentClassification (zh-TW)": 54.96, - "MassiveScenarioClassification (af)": 53.67, - "MassiveScenarioClassification (am)": 7.72, - "MassiveScenarioClassification (ar)": 52.19, - "MassiveScenarioClassification (az)": 34.75, - "MassiveScenarioClassification (bn)": 10.65, - "MassiveScenarioClassification (cy)": 21.24, - "MassiveScenarioClassification (da)": 62.55, - "MassiveScenarioClassification (de)": 61.4, - "MassiveScenarioClassification (el)": 60.68, - "MassiveScenarioClassification (en)": 74.0, - "MassiveScenarioClassification (es)": 64.61, - "MassiveScenarioClassification (fa)": 59.24, - "MassiveScenarioClassification (fi)": 54.66, - "MassiveScenarioClassification (fr)": 65.2, - "MassiveScenarioClassification (he)": 54.74, - "MassiveScenarioClassification (hi)": 55.99, - "MassiveScenarioClassification (hu)": 61.2, - "MassiveScenarioClassification (hy)": 49.63, - "MassiveScenarioClassification (id)": 65.25, - "MassiveScenarioClassification (is)": 22.6, - "MassiveScenarioClassification (it)": 64.63, - "MassiveScenarioClassification (ja)": 62.32, - "MassiveScenarioClassification (jv)": 35.77, - "MassiveScenarioClassification (ka)": 39.08, - "MassiveScenarioClassification (km)": 9.24, - "MassiveScenarioClassification (kn)": 8.28, - "MassiveScenarioClassification (ko)": 57.6, - "MassiveScenarioClassification (lv)": 51.72, - "MassiveScenarioClassification (ml)": 8.25, - "MassiveScenarioClassification (mn)": 47.21, - "MassiveScenarioClassification (ms)": 55.65, - "MassiveScenarioClassification (my)": 43.31, - "MassiveScenarioClassification (nb)": 54.98, - "MassiveScenarioClassification (nl)": 67.49, - "MassiveScenarioClassification (pl)": 61.29, - "MassiveScenarioClassification (pt)": 64.26, - "MassiveScenarioClassification (ro)": 58.03, - "MassiveScenarioClassification (ru)": 65.41, - "MassiveScenarioClassification (sl)": 59.36, - "MassiveScenarioClassification (sq)": 62.69, - "MassiveScenarioClassification (sv)": 64.35, - "MassiveScenarioClassification (sw)": 25.12, - "MassiveScenarioClassification (ta)": 8.67, - "MassiveScenarioClassification (te)": 7.82, - "MassiveScenarioClassification (th)": 54.65, - "MassiveScenarioClassification (tl)": 36.09, - "MassiveScenarioClassification (tr)": 60.89, - "MassiveScenarioClassification (ur)": 54.71, - "MassiveScenarioClassification (vi)": 55.15, - "MassiveScenarioClassification (zh-CN)": 66.44, - "MassiveScenarioClassification (zh-TW)": 62.89, - "PAC": 68.17, - "PolEmo2.0-IN": 48.84, - "PolEmo2.0-OUT": 30.0, - "ToxicConversationsClassification": 69.09, - "TweetSentimentExtractionClassification": 59.97 + "Model": "bge-m3", + "GeoreviewClassification": 48.27, + "HeadlineClassification": 70.32, + "InappropriatenessClassification": 59.87, + "KinopoiskClassification": 58.23, + "MassiveIntentClassification (ru)": 68.75, + "MassiveScenarioClassification (ru)": 73.42, + "RuReviewsClassification": 66.91, + "RuSciBenchGRNTIClassification": 55.81, + "RuSciBenchOECDClassification": 42.57 } ] }, "Clustering": { "v_measure": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "8TagsClustering": 12.51, - "AlloProfClusteringP2P": 55.95, - "AlloProfClusteringS2S": 35.39, - "ArxivClusteringP2P": 33.59, - "HALClusteringS2S": 18.2, - "MLSUMClusteringP2P": 40.17, - "MLSUMClusteringS2S": 34.65, - "MasakhaNEWSClusteringP2P (fra)": 53.76, - "MasakhaNEWSClusteringS2S (fra)": 32.76 + "Model": "bge-m3", + "GeoreviewClusteringP2P": 63.09, + "RuSciBenchGRNTIClusteringP2P": 50.83, + "RuSciBenchOECDClusteringP2P": 43.21 } ] }, "PairClassification": { "max_ap": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "CDSC-E": 71.83, - "OpusparcusPC (fr)": 92.07, - "PPC": 86.83, - "PSC": 96.35, - "PawsXPairClassification (fr)": 51.08, - "SICK-E-PL": 62.05, - "SprintDuplicateQuestions": 87.15, - "TwitterSemEval2015": 61.67, - "TwitterURLCorpus": 84.02 + "Model": "bge-m3", + "OpusparcusPC (ru)": 89.64, + "TERRa": 60.6 }, { - "Model": "distiluse-base-multilingual-cased-v2", - "CDSC-E": 72.0, - "OpusparcusPC (fr)": 92.07, - "PPC": 86.83, - "PSC": 96.35, - "PawsXPairClassification (fr)": 51.16, - "SICK-E-PL": 62.41, - "SprintDuplicateQuestions": 88.43, - "TwitterSemEval2015": 62.46, - "TwitterURLCorpus": 84.02 + "Model": "bge-m3", + "OpusparcusPC (ru)": 89.64, + "TERRa": 60.6 } ] }, "Reranking": { "map": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "AlloprofReranking": 51.77, - "AskUbuntuDupQuestions": 53.75, - "MindSmallReranking": 30.39, - "SciDocsRR": 69.22, - "StackOverflowDupQuestions": 41.92, - "SyntecReranking": 74.78 + "Model": "bge-m3", + "MIRACLReranking (ru)": 65.38 + }, + { + "Model": "bge-m3", + "RuBQReranking": 74.03 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "AlloprofRetrieval": 26.99, - "ArguAna-PL": 36.7, - "BSARDRetrieval": 0.0, - "DBPedia-PL": 12.36, - "FiQA-PL": 8.02, - "HotpotQA-PL": 20.83, - "MSMARCO-PL": 4.57, - "MintakaRetrieval (fr)": 22.55, - "NFCorpus-PL": 16.28, - "NQ-PL": 5.85, - "Quora-PL": 71.95, - "SCIDOCS-PL": 6.5, - "SciFact-PL": 33.03, - "SyntecRetrieval": 65.34, - "TRECCOVID-PL": 16.91, - "XPQARetrieval (fr)": 51.2 + "Model": "bge-m3", + "ARCChallenge": 9.02, + "AlphaNLI": 24.73, + "HellaSwag": 25.67, + "LEMBNarrativeQARetrieval": 45.76, + "LEMBQMSumRetrieval": 35.54, + "LEMBSummScreenFDRetrieval": 94.09, + "LEMBWikimQARetrieval": 77.73, + "MIRACLRetrieval (ru)": 70.16, + "PIQA": 22.93, + "Quail": 7.51, + "RARbCode": 38.8, + "RARbMath": 69.19, + "RiaNewsRetrieval": 82.99, + "RuBQRetrieval": 71.22, + "SIQA": 4.89, + "SpartQA": 7.49, + "TempReasonL1": 0.99, + "TempReasonL2Fact": 33.23, + "TempReasonL2Pure": 0.68, + "TempReasonL3Fact": 30.05, + "TempReasonL3Pure": 5.28, + "WinoGrande": 41.72 } ] }, "STS": { "cosine_spearman": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "BIOSSES": 78.34, - "CDSC-R": 87.67, - "SICK-R": 75.25, - "SICK-R-PL": 65.53, - "SICKFr": 72.49, - "STS12": 72.96, - "STS13": 70.58, - "STS14": 70.29, - "STS15": 81.94, - "STS16": 76.8, - "STS17 (ar-ar)": 77.34, - "STS17 (en-ar)": 77.46, - "STS17 (en-de)": 80.24, - "STS17 (en-en)": 86.19, - "STS17 (en-tr)": 74.34, - "STS17 (es-en)": 77.4, - "STS17 (es-es)": 83.71, - "STS17 (fr-en)": 79.28, - "STS17 (it-en)": 80.82, - "STS17 (ko-ko)": 76.4, - "STS17 (nl-en)": 80.51, - "STS22 (ar)": 49.04, - "STS22 (de)": 35.73, - "STS22 (de-en)": 47.51, - "STS22 (de-fr)": 60.76, - "STS22 (de-pl)": 36.09, - "STS22 (en)": 62.88, - "STS22 (es)": 59.34, - "STS22 (es-en)": 68.96, - "STS22 (es-it)": 63.28, - "STS22 (fr)": 76.41, - "STS22 (fr-pl)": 61.98, - "STS22 (it)": 65.1, - "STS22 (pl)": 34.58, - "STS22 (pl-en)": 71.33, - "STS22 (ru)": 52.4, - "STS22 (tr)": 54.07, - "STS22 (zh)": 54.32, - "STS22 (zh-en)": 61.75, - "STSBenchmark": 80.75, - "STSBenchmarkMultilingualSTS (fr)": 77.49 + "Model": "bge-m3", + "RUParaPhraserSTS": 74.9, + "RuSTSBenchmarkSTS": 79.87, + "STS22 (ru)": 66.26, + "STSBenchmarkMultilingualSTS (ru)": 79.27 } ] }, "Summarization": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [ { - "Model": "distiluse-base-multilingual-cased-v2", - "SummEvalFr": 28.12 + "Model": "bge-m3", + "CEDRClassification": 43.47, + "SensitiveTopicsClassification": 26.25 } ] }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { "p-MRR": [] } }, - "gte-Qwen2-7B-instruct": { + "bge-m3-instruct": { "BitextMining": { "f1": [] }, @@ -6869,32 +7322,73 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "gte-Qwen2-7B-instruct", - "BrightRetrieval (earth_science)": 40.66, - "BrightRetrieval (sustainable_living)": 20.82, - "BrightRetrieval (theoremqa_theorems)": 34.22, - "BrightRetrieval (aops)": 15.1, - "BrightRetrieval (economics)": 16.18, - "BrightRetrieval (pony)": 1.25, - "BrightRetrieval (stackoverflow)": 13.95, - "BrightRetrieval (leetcode)": 31.07, - "BrightRetrieval (biology)": 32.09, - "BrightRetrieval (theoremqa_questions)": 29.9, - "BrightRetrieval (robotics)": 12.82, - "BrightRetrieval (psychology)": 26.58 + "Model": "bge-m3-instruct", + "ARCChallenge": 9.03, + "AlphaNLI": 24.69, + "HellaSwag": 25.55, + "PIQA": 19.03, + "Quail": 7.08, + "RARbCode": 39.58, + "RARbMath": 64.51, + "SIQA": 4.77, + "SpartQA": 7.0, + "TempReasonL1": 0.8, + "TempReasonL2Fact": 34.99, + "TempReasonL2Pure": 0.62, + "TempReasonL3Fact": 32.47, + "TempReasonL3Pure": 7.01, + "WinoGrande": 35.33 } - ], - "recall_at_1": [ - { - "Model": "gte-Qwen2-7B-instruct", - "BrightRetrieval (psychology)": 46.73, - "BrightRetrieval (biology)": 34.87, - "BrightRetrieval (sustainable_living)": 31.28, - "BrightRetrieval (robotics)": 10.89, - "BrightRetrieval (pony)": 1.17, - "BrightRetrieval (earth_science)": 38.36, - "BrightRetrieval (stackoverflow)": 16.67, - "BrightRetrieval (economics)": 27.67 + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "bge-small-en-v1.5": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-small-en-v1.5", + "ARCChallenge": 8.95, + "AlphaNLI": 11.64, + "HellaSwag": 25.44, + "PIQA": 23.92, + "Quail": 1.75, + "RARbCode": 42.36, + "RARbMath": 44.98, + "SIQA": 0.77, + "SpartQA": 3.55, + "TempReasonL1": 1.41, + "TempReasonL2Fact": 17.56, + "TempReasonL2Pure": 1.05, + "TempReasonL3Fact": 13.88, + "TempReasonL3Pure": 4.76, + "WinoGrande": 10.28 } ] }, @@ -6911,1543 +7405,494 @@ "p-MRR": [] } }, - "GritLM-7B": { + "bge-small-en-v1.5-instruct": { "BitextMining": { - "f1": [ + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "GritLM-7B", - "BornholmBitextMining (dan-Latn)": 61.17, - "Tatoeba (hye-Armn_eng-Latn)": 54.7, - "Tatoeba (afr-Latn_eng-Latn)": 91.82, - "Tatoeba (vie-Latn_eng-Latn)": 96.18, - "Tatoeba (cym-Latn_eng-Latn)": 75.08, - "Tatoeba (xho-Latn_eng-Latn)": 42.31, - "Tatoeba (uzb-Latn_eng-Latn)": 59.94, - "Tatoeba (gsw-Latn_eng-Latn)": 59.66, - "Tatoeba (mal-Mlym_eng-Latn)": 52.3, - "Tatoeba (ben-Beng_eng-Latn)": 78.41, - "Tatoeba (spa-Latn_eng-Latn)": 98.62, - "Tatoeba (arq-Arab_eng-Latn)": 43.42, - "Tatoeba (hin-Deva_eng-Latn)": 93.86, - "Tatoeba (yue-Hant_eng-Latn)": 88.43, - "Tatoeba (nld-Latn_eng-Latn)": 96.35, - "Tatoeba (hun-Latn_eng-Latn)": 94.2, - "Tatoeba (tha-Thai_eng-Latn)": 92.55, - "Tatoeba (ber-Tfng_eng-Latn)": 7.88, - "Tatoeba (cor-Latn_eng-Latn)": 10.97, - "Tatoeba (kab-Latn_eng-Latn)": 3.21, - "Tatoeba (mon-Cyrl_eng-Latn)": 38.62, - "Tatoeba (pes-Arab_eng-Latn)": 90.25, - "Tatoeba (ind-Latn_eng-Latn)": 93.53, - "Tatoeba (fry-Latn_eng-Latn)": 73.7, - "Tatoeba (dan-Latn_eng-Latn)": 95.6, - "Tatoeba (ron-Latn_eng-Latn)": 94.53, - "Tatoeba (dsb-Latn_eng-Latn)": 69.41, - "Tatoeba (nds-Latn_eng-Latn)": 82.66, - "Tatoeba (jav-Latn_eng-Latn)": 40.36, - "Tatoeba (ast-Latn_eng-Latn)": 86.35, - "Tatoeba (lfn-Latn_eng-Latn)": 79.93, - "Tatoeba (mkd-Cyrl_eng-Latn)": 88.68, - "Tatoeba (kzj-Latn_eng-Latn)": 12.2, - "Tatoeba (ido-Latn_eng-Latn)": 82.17, - "Tatoeba (eus-Latn_eng-Latn)": 44.09, - "Tatoeba (ita-Latn_eng-Latn)": 94.54, - "Tatoeba (orv-Cyrl_eng-Latn)": 63.76, - "Tatoeba (zsm-Latn_eng-Latn)": 93.39, - "Tatoeba (mar-Deva_eng-Latn)": 68.94, - "Tatoeba (slv-Latn_eng-Latn)": 91.48, - "Tatoeba (hsb-Latn_eng-Latn)": 79.62, - "Tatoeba (ile-Latn_eng-Latn)": 91.1, - "Tatoeba (ara-Arab_eng-Latn)": 89.5, - "Tatoeba (yid-Hebr_eng-Latn)": 31.64, - "Tatoeba (swh-Latn_eng-Latn)": 64.97, - "Tatoeba (khm-Khmr_eng-Latn)": 32.74, - "Tatoeba (arz-Arab_eng-Latn)": 68.47, - "Tatoeba (amh-Ethi_eng-Latn)": 11.66, - "Tatoeba (max-Deva_eng-Latn)": 66.33, - "Tatoeba (pms-Latn_eng-Latn)": 70.24, - "Tatoeba (kat-Geor_eng-Latn)": 58.44, - "Tatoeba (ang-Latn_eng-Latn)": 83.83, - "Tatoeba (fra-Latn_eng-Latn)": 94.5, - "Tatoeba (ukr-Cyrl_eng-Latn)": 94.35, - "Tatoeba (wuu-Hans_eng-Latn)": 90.31, - "Tatoeba (pol-Latn_eng-Latn)": 97.35, - "Tatoeba (tzl-Latn_eng-Latn)": 62.95, - "Tatoeba (awa-Deva_eng-Latn)": 70.28, - "Tatoeba (isl-Latn_eng-Latn)": 88.47, - "Tatoeba (hrv-Latn_eng-Latn)": 95.43, - "Tatoeba (bre-Latn_eng-Latn)": 19.19, - "Tatoeba (cmn-Hans_eng-Latn)": 95.48, - "Tatoeba (gla-Latn_eng-Latn)": 65.28, - "Tatoeba (ces-Latn_eng-Latn)": 96.03, - "Tatoeba (est-Latn_eng-Latn)": 69.22, - "Tatoeba (aze-Latn_eng-Latn)": 82.05, - "Tatoeba (nov-Latn_eng-Latn)": 78.53, - "Tatoeba (ina-Latn_eng-Latn)": 96.35, - "Tatoeba (cha-Latn_eng-Latn)": 47.58, - "Tatoeba (kaz-Cyrl_eng-Latn)": 47.61, - "Tatoeba (fin-Latn_eng-Latn)": 93.35, - "Tatoeba (deu-Latn_eng-Latn)": 99.47, - "Tatoeba (kor-Hang_eng-Latn)": 91.4, - "Tatoeba (tam-Taml_eng-Latn)": 66.66, - "Tatoeba (mhr-Cyrl_eng-Latn)": 13.31, - "Tatoeba (tur-Latn_eng-Latn)": 95.13, - "Tatoeba (sqi-Latn_eng-Latn)": 73.59, - "Tatoeba (por-Latn_eng-Latn)": 94.45, - "Tatoeba (epo-Latn_eng-Latn)": 90.0, - "Tatoeba (jpn-Jpan_eng-Latn)": 95.58, - "Tatoeba (tgl-Latn_eng-Latn)": 94.29, - "Tatoeba (swg-Latn_eng-Latn)": 73.87, - "Tatoeba (ceb-Latn_eng-Latn)": 45.91, - "Tatoeba (cat-Latn_eng-Latn)": 94.15, - "Tatoeba (nob-Latn_eng-Latn)": 98.0, - "Tatoeba (gle-Latn_eng-Latn)": 71.65, - "Tatoeba (nno-Latn_eng-Latn)": 90.49, - "Tatoeba (war-Latn_eng-Latn)": 41.74, - "Tatoeba (slk-Latn_eng-Latn)": 93.92, - "Tatoeba (urd-Arab_eng-Latn)": 83.82, - "Tatoeba (bos-Latn_eng-Latn)": 95.9, - "Tatoeba (bel-Cyrl_eng-Latn)": 90.07, - "Tatoeba (heb-Hebr_eng-Latn)": 82.14, - "Tatoeba (glg-Latn_eng-Latn)": 94.18, - "Tatoeba (srp-Cyrl_eng-Latn)": 94.23, - "Tatoeba (oci-Latn_eng-Latn)": 71.66, - "Tatoeba (ell-Grek_eng-Latn)": 91.56, - "Tatoeba (kur-Latn_eng-Latn)": 35.06, - "Tatoeba (fao-Latn_eng-Latn)": 83.02, - "Tatoeba (tat-Cyrl_eng-Latn)": 39.47, - "Tatoeba (lvs-Latn_eng-Latn)": 72.89, - "Tatoeba (csb-Latn_eng-Latn)": 69.78, - "Tatoeba (dtp-Latn_eng-Latn)": 10.77, - "Tatoeba (pam-Latn_eng-Latn)": 15.22, - "Tatoeba (tel-Telu_eng-Latn)": 37.93, - "Tatoeba (bul-Cyrl_eng-Latn)": 93.5, - "Tatoeba (tuk-Latn_eng-Latn)": 48.38, - "Tatoeba (cbk-Latn_eng-Latn)": 83.83, - "Tatoeba (lit-Latn_eng-Latn)": 77.91, - "Tatoeba (swe-Latn_eng-Latn)": 94.95, - "Tatoeba (uig-Arab_eng-Latn)": 40.4, - "Tatoeba (lat-Latn_eng-Latn)": 91.56, - "Tatoeba (rus-Cyrl_eng-Latn)": 93.93 + "Model": "bge-small-en-v1.5-instruct", + "ARCChallenge": 7.72, + "AlphaNLI": 1.26, + "HellaSwag": 23.41, + "PIQA": 20.79, + "Quail": 2.01, + "RARbCode": 41.52, + "RARbMath": 46.5, + "SIQA": 0.98, + "SpartQA": 2.86, + "TempReasonL1": 1.27, + "TempReasonL2Fact": 16.72, + "TempReasonL2Pure": 1.1, + "TempReasonL3Fact": 12.81, + "TempReasonL3Pure": 4.63, + "WinoGrande": 5.35 } ] }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "bge-small-zh-v1.5": { + "BitextMining": { + "f1": [] + }, "Classification": { "accuracy": [ { - "Model": "GritLM-7B", - "AllegroReviews (pol-Latn)": 56.77, - "AmazonCounterfactualClassification (en-ext)": 79.61, - "AmazonCounterfactualClassification (en)": 80.54, - "AmazonCounterfactualClassification (deu-Latn)": 74.94, - "AmazonCounterfactualClassification (jpn-Jpan)": 81.6, - "AmazonPolarityClassification": 96.64, - "AmazonReviewsClassification (en)": 59.91, - "AmazonReviewsClassification (deu-Latn)": 58.21, - "AmazonReviewsClassification (spa-Latn)": 55.74, - "AmazonReviewsClassification (fra-Latn)": 54.43, - "AmazonReviewsClassification (jpn-Jpan)": 54.51, - "AmazonReviewsClassification (cmn-Hans)": 51.01, - "AngryTweetsClassification (dan-Latn)": 65.35, - "Banking77Classification": 84.44, - "CBD (pol-Latn)": 77.23, - "DanishPoliticalCommentsClassification (dan-Latn)": 41.28, - "EmotionClassification": 56.01, - "GeoreviewClassification (rus-Cyrl)": 53.47, - "HeadlineClassification (rus-Cyrl)": 85.66, - "IFlyTek (cmn-Hans)": 52.7, - "ImdbClassification": 95.07, - "InappropriatenessClassification (rus-Cyrl)": 65.29, - "JDReview (cmn-Hans)": 87.65, - "KinopoiskClassification (rus-Cyrl)": 64.25, - "LccSentimentClassification (dan-Latn)": 70.13, - "MTOPDomainClassification (en)": 95.37, - "MTOPDomainClassification (deu-Latn)": 93.51, - "MTOPDomainClassification (spa-Latn)": 92.78, - "MTOPDomainClassification (fra-Latn)": 92.25, - "MTOPDomainClassification (hin-Deva)": 85.26, - "MTOPDomainClassification (tha-Thai)": 83.35, - "MTOPIntentClassification (en)": 81.23, - "MTOPIntentClassification (deu-Latn)": 79.28, - "MTOPIntentClassification (spa-Latn)": 81.2, - "MTOPIntentClassification (fra-Latn)": 76.87, - "MTOPIntentClassification (hin-Deva)": 63.66, - "MTOPIntentClassification (tha-Thai)": 69.59, - "MasakhaNEWSClassification (amh-Ethi)": 53.83, - "MasakhaNEWSClassification (eng)": 80.47, - "MasakhaNEWSClassification (fra-Latn)": 81.35, - "MasakhaNEWSClassification (hau-Latn)": 75.4, - "MasakhaNEWSClassification (ibo-Latn)": 74.18, - "MasakhaNEWSClassification (lin-Latn)": 77.43, - "MasakhaNEWSClassification (lug-Latn)": 72.38, - "MasakhaNEWSClassification (orm-Ethi)": 79.02, - "MasakhaNEWSClassification (pcm-Latn)": 94.79, - "MasakhaNEWSClassification (run-Latn)": 80.81, - "MasakhaNEWSClassification (sna-Latn)": 87.29, - "MasakhaNEWSClassification (som-Latn)": 65.82, - "MasakhaNEWSClassification (swa-Latn)": 75.06, - "MasakhaNEWSClassification (tir-Ethi)": 35.07, - "MasakhaNEWSClassification (xho-Latn)": 84.01, - "MasakhaNEWSClassification (yor-Latn)": 83.82, - "MassiveIntentClassification (kan-Knda)": 48.65, - "MassiveIntentClassification (isl-Latn)": 59.76, - "MassiveIntentClassification (swa-Latn)": 54.04, - "MassiveIntentClassification (amh-Ethi)": 36.93, - "MassiveIntentClassification (fra-Latn)": 75.51, - "MassiveIntentClassification (hye-Armn)": 47.26, - "MassiveIntentClassification (spa-Latn)": 75.14, - "MassiveIntentClassification (ind-Latn)": 72.84, - "MassiveIntentClassification (ita-Latn)": 76.46, - "MassiveIntentClassification (cmo-Hans)": 75.69, - "MassiveIntentClassification (cmo-Hant)": 70.86, - "MassiveIntentClassification (mal-Mlym)": 46.31, - "MassiveIntentClassification (msa-Latn)": 70.72, - "MassiveIntentClassification (jav-Latn)": 55.89, - "MassiveIntentClassification (tel-Telu)": 46.92, - "MassiveIntentClassification (urd-Arab)": 59.32, - "MassiveIntentClassification (vie-Latn)": 69.02, - "MassiveIntentClassification (mon-Cyrl)": 44.24, - "MassiveIntentClassification (lav-Latn)": 57.35, - "MassiveIntentClassification (tha-Thai)": 63.08, - "MassiveIntentClassification (hun-Latn)": 70.42, - "MassiveIntentClassification (afr-Latn)": 67.3, - "MassiveIntentClassification (por-Latn)": 76.08, - "MassiveIntentClassification (sqi-Latn)": 56.51, - "MassiveIntentClassification (heb-Hebr)": 63.87, - "MassiveIntentClassification (rus-Cyrl)": 76.01, - "MassiveIntentClassification (dan-Latn)": 73.46, - "MassiveIntentClassification (cym-Latn)": 53.51, - "MassiveIntentClassification (jpn-Jpan)": 75.19, - "MassiveIntentClassification (en)": 79.1, - "MassiveIntentClassification (fin-Latn)": 67.91, - "MassiveIntentClassification (aze-Latn)": 62.1, - "MassiveIntentClassification (ara-Arab)": 60.26, - "MassiveIntentClassification (hin-Deva)": 66.02, - "MassiveIntentClassification (swe-Latn)": 75.29, - "MassiveIntentClassification (ell-Grek)": 67.04, - "MassiveIntentClassification (khm-Khmr)": 42.63, - "MassiveIntentClassification (kor-Kore)": 71.38, - "MassiveIntentClassification (ron-Latn)": 70.1, - "MassiveIntentClassification (tam-Taml)": 48.79, - "MassiveIntentClassification (nld-Latn)": 75.5, - "MassiveIntentClassification (kat-Geor)": 46.6, - "MassiveIntentClassification (nob-Latn)": 70.92, - "MassiveIntentClassification (tur-Latn)": 70.73, - "MassiveIntentClassification (pol-Latn)": 75.36, - "MassiveIntentClassification (ben-Beng)": 57.82, - "MassiveIntentClassification (slv-Latn)": 69.98, - "MassiveIntentClassification (tgl-Latn)": 67.86, - "MassiveIntentClassification (mya-Mymr)": 40.8, - "MassiveIntentClassification (deu-Latn)": 74.8, - "MassiveIntentClassification (fas-Arab)": 70.45, - "MassiveScenarioClassification (nob-Latn)": 75.62, - "MassiveScenarioClassification (sqi-Latn)": 62.49, - "MassiveScenarioClassification (slv-Latn)": 74.71, - "MassiveScenarioClassification (deu-Latn)": 78.58, - "MassiveScenarioClassification (fin-Latn)": 70.33, - "MassiveScenarioClassification (ben-Beng)": 61.88, - "MassiveScenarioClassification (heb-Hebr)": 67.52, - "MassiveScenarioClassification (ell-Grek)": 70.08, - "MassiveScenarioClassification (vie-Latn)": 72.94, - "MassiveScenarioClassification (hun-Latn)": 73.71, - "MassiveScenarioClassification (mal-Mlym)": 51.24, - "MassiveScenarioClassification (msa-Latn)": 73.8, - "MassiveScenarioClassification (hin-Deva)": 69.18, - "MassiveScenarioClassification (isl-Latn)": 66.04, - "MassiveScenarioClassification (tel-Telu)": 52.39, - "MassiveScenarioClassification (swa-Latn)": 62.19, - "MassiveScenarioClassification (tgl-Latn)": 71.83, - "MassiveScenarioClassification (swe-Latn)": 78.52, - "MassiveScenarioClassification (en)": 80.37, - "MassiveScenarioClassification (jpn-Jpan)": 77.49, - "MassiveScenarioClassification (aze-Latn)": 65.13, - "MassiveScenarioClassification (fra-Latn)": 77.29, - "MassiveScenarioClassification (ita-Latn)": 77.88, - "MassiveScenarioClassification (kor-Kore)": 75.95, - "MassiveScenarioClassification (cmo-Hans)": 78.47, - "MassiveScenarioClassification (ind-Latn)": 75.94, - "MassiveScenarioClassification (kan-Knda)": 55.06, - "MassiveScenarioClassification (cym-Latn)": 60.36, - "MassiveScenarioClassification (amh-Ethi)": 44.74, - "MassiveScenarioClassification (hye-Armn)": 53.06, - "MassiveScenarioClassification (khm-Khmr)": 50.11, - "MassiveScenarioClassification (por-Latn)": 76.23, - "MassiveScenarioClassification (cmo-Hant)": 75.14, - "MassiveScenarioClassification (rus-Cyrl)": 78.28, - "MassiveScenarioClassification (tam-Taml)": 54.59, - "MassiveScenarioClassification (spa-Latn)": 77.05, - "MassiveScenarioClassification (mon-Cyrl)": 51.04, - "MassiveScenarioClassification (tha-Thai)": 67.74, - "MassiveScenarioClassification (urd-Arab)": 65.78, - "MassiveScenarioClassification (mya-Mymr)": 46.9, - "MassiveScenarioClassification (fas-Arab)": 73.52, - "MassiveScenarioClassification (afr-Latn)": 72.84, - "MassiveScenarioClassification (nld-Latn)": 77.79, - "MassiveScenarioClassification (tur-Latn)": 71.9, - "MassiveScenarioClassification (ara-Arab)": 67.23, - "MassiveScenarioClassification (pol-Latn)": 76.25, - "MassiveScenarioClassification (ron-Latn)": 72.54, - "MassiveScenarioClassification (jav-Latn)": 62.33, - "MassiveScenarioClassification (dan-Latn)": 76.98, - "MassiveScenarioClassification (lav-Latn)": 61.54, - "MassiveScenarioClassification (kat-Geor)": 54.44, - "MultilingualSentiment (cmn-Hans)": 75.51, - "NoRecClassification (nob-Latn)": 56.05, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 69.36, - "OnlineShopping (cmn-Hans)": 93.09, - "PAC (pol-Latn)": 67.05, - "PolEmo2.0-IN (pol-Latn)": 83.5, - "PolEmo2.0-OUT (pol-Latn)": 62.55, - "RuReviewsClassification (rus-Cyrl)": 68.58, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 64.56, - "RuSciBenchOECDClassification (rus-Cyrl)": 51.2, - "TNews (cmn-Hans)": 51.99, - "ToxicConversationsClassification": 68.81, - "TweetSentimentExtractionClassification": 66.26, - "Waimai (cmn-Hans)": 87.91 + "Model": "bge-small-zh-v1.5", + "AmazonReviewsClassification": 35.91, + "IFlyTek": 45.49, + "JDReview": 80.04, + "MassiveIntentClassification": 63.95, + "MassiveScenarioClassification": 70.8, + "MultilingualSentiment": 63.06, + "OnlineShopping": 85.05, + "TNews": 48.15, + "Waimai": 83.18 } ] }, "Clustering": { "v_measure": [ { - "Model": "GritLM-7B", - "GeoreviewClusteringP2P (rus-Cyrl)": 74.06, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 45.1, - "MasakhaNEWSClusteringP2P (eng)": 70.5, - "MasakhaNEWSClusteringP2P (fra-Latn)": 73.54, - "MasakhaNEWSClusteringP2P (hau-Latn)": 59.75, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 66.75, - "MasakhaNEWSClusteringP2P (lin-Latn)": 59.57, - "MasakhaNEWSClusteringP2P (lug-Latn)": 58.93, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 54.38, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 92.67, - "MasakhaNEWSClusteringP2P (run-Latn)": 59.74, - "MasakhaNEWSClusteringP2P (sna-Latn)": 68.86, - "MasakhaNEWSClusteringP2P (som-Latn)": 42.54, - "MasakhaNEWSClusteringP2P (swa-Latn)": 33.61, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 51.66, - "MasakhaNEWSClusteringP2P (xho-Latn)": 46.65, - "MasakhaNEWSClusteringP2P (yor-Latn)": 52.39, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 43.39, - "MasakhaNEWSClusteringS2S (eng)": 65.85, - "MasakhaNEWSClusteringS2S (fra-Latn)": 68.87, - "MasakhaNEWSClusteringS2S (hau-Latn)": 33.02, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 64.55, - "MasakhaNEWSClusteringS2S (lin-Latn)": 72.01, - "MasakhaNEWSClusteringS2S (lug-Latn)": 47.42, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 32.59, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 97.82, - "MasakhaNEWSClusteringS2S (run-Latn)": 59.41, - "MasakhaNEWSClusteringS2S (sna-Latn)": 71.58, - "MasakhaNEWSClusteringS2S (som-Latn)": 40.91, - "MasakhaNEWSClusteringS2S (swa-Latn)": 33.54, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 45.32, - "MasakhaNEWSClusteringS2S (xho-Latn)": 28.94, - "MasakhaNEWSClusteringS2S (yor-Latn)": 63.26, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 60.01, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 51.66 + "Model": "bge-small-zh-v1.5", + "CLSClusteringP2P": 38.14, + "CLSClusteringS2S": 35.14, + "ThuNewsClusteringP2P": 54.22, + "ThuNewsClusteringS2S": 49.22 } ] }, "PairClassification": { "max_ap": [ { - "Model": "GritLM-7B", - "CDSC-E (pol-Latn)": 75.61, - "OpusparcusPC (deu-Latn)": 97.43, - "OpusparcusPC (en)": 99.14, - "OpusparcusPC (fin-Latn)": 92.05, - "OpusparcusPC (fra-Latn)": 95.14, - "OpusparcusPC (rus-Cyrl)": 91.13, - "OpusparcusPC (swe-Latn)": 94.87, - "PSC (pol-Latn)": 99.42, - "PawsXPairClassification (deu-Latn)": 57.59, - "PawsXPairClassification (en)": 66.63, - "PawsXPairClassification (spa-Latn)": 58.9, - "PawsXPairClassification (fra-Latn)": 61.48, - "PawsXPairClassification (jpn-Hira)": 51.11, - "PawsXPairClassification (kor-Hang)": 51.9, - "PawsXPairClassification (cmn-Hans)": 57.86, - "SICK-E-PL (pol-Latn)": 79.06, - "SprintDuplicateQuestions": 93.0, - "TERRa (rus-Cyrl)": 58.86, - "TwitterSemEval2015": 81.08, - "TwitterURLCorpus": 87.4 + "Model": "bge-small-zh-v1.5", + "Cmnli": 76.24, + "Ocnli": 64.57 }, { - "Model": "GritLM-7B", - "CDSC-E (pol-Latn)": 75.61, - "OpusparcusPC (deu-Latn)": 97.43, - "OpusparcusPC (en)": 99.14, - "OpusparcusPC (fin-Latn)": 92.05, - "OpusparcusPC (fra-Latn)": 95.14, - "OpusparcusPC (rus-Cyrl)": 91.13, - "OpusparcusPC (swe-Latn)": 94.87, - "PSC (pol-Latn)": 99.45, - "PawsXPairClassification (deu-Latn)": 57.66, - "PawsXPairClassification (en)": 66.82, - "PawsXPairClassification (spa-Latn)": 59.04, - "PawsXPairClassification (fra-Latn)": 61.56, - "PawsXPairClassification (jpn-Hira)": 51.2, - "PawsXPairClassification (kor-Hang)": 51.92, - "PawsXPairClassification (cmn-Hans)": 57.88, - "SICK-E-PL (pol-Latn)": 79.06, - "SprintDuplicateQuestions": 93.35, - "TERRa (rus-Cyrl)": 58.93, - "TwitterSemEval2015": 81.08, - "TwitterURLCorpus": 87.43 + "Model": "bge-small-zh-v1.5", + "Cmnli": 76.24, + "Ocnli": 64.57 } ] }, "Reranking": { "map": [ { - "Model": "GritLM-7B", - "AlloprofReranking (fra-Latn)": 77.93, - "AskUbuntuDupQuestions": 67.37, - "MMarcoReranking (cmn-Hans)": 21.7, - "MindSmallReranking": 31.81, - "RuBQReranking (rus-Cyrl)": 72.43, - "SciDocsRR": 86.82, - "StackOverflowDupQuestions": 55.94, - "SyntecReranking (fra-Latn)": 92.62, - "T2Reranking (cmn-Hans)": 65.64 + "Model": "bge-small-zh-v1.5", + "CMedQAv1": 77.4, + "CMedQAv2": 79.86, + "MMarcoReranking": 20.5, + "T2Reranking": 65.9 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "GritLM-7B", - "AILACasedocs": 35.29, - "AILAStatutes": 41.8, - "ARCChallenge": 26.68, - "AlloprofRetrieval (fra-Latn)": 55.42, - "AlphaNLI": 34.0, - "AppsRetrieval (eng-Latn_python-Code)": 35.13, - "ArguAna": 63.17, - "ArguAna-PL (pol-Latn)": 48.96, - "BSARDRetrieval (fra-Latn)": 26.61, - "BrightRetrieval (pony)": 21.98, - "BrightRetrieval (robotics)": 17.31, - "BrightRetrieval (economics)": 19.0, - "BrightRetrieval (theoremqa_questions)": 23.34, - "BrightRetrieval (leetcode)": 29.85, - "BrightRetrieval (earth_science)": 32.77, - "BrightRetrieval (stackoverflow)": 11.62, - "BrightRetrieval (sustainable_living)": 18.04, - "BrightRetrieval (biology)": 25.04, - "BrightRetrieval (psychology)": 19.92, - "BrightRetrieval (theoremqa_theorems)": 19.75, - "BrightRetrieval (aops)": 8.91, - "CmedqaRetrieval (cmn-Hans)": 35.58, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 50.64, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 84.08, - "CodeSearchNetCCRetrieval (python-Code)": 90.92, - "CodeSearchNetCCRetrieval (javascript-Code)": 87.27, - "CodeSearchNetCCRetrieval (go-Code)": 85.44, - "CodeSearchNetCCRetrieval (ruby-Code)": 89.63, - "CodeSearchNetCCRetrieval (java-Code)": 87.63, - "CodeSearchNetCCRetrieval (php-Code)": 79.25, - "CodeSearchNetRetrieval (python-Code)": 91.65, - "CodeSearchNetRetrieval (javascript-Code)": 81.43, - "CodeSearchNetRetrieval (go-Code)": 90.01, - "CodeSearchNetRetrieval (ruby-Code)": 85.42, - "CodeSearchNetRetrieval (java-Code)": 89.84, - "CodeSearchNetRetrieval (php-Code)": 81.57, - "CodeTransOceanContest (python-Code_c++-Code)": 89.22, - "CodeTransOceanDL": 32.98, - "CosQA (eng-Latn_python-Code)": 31.24, - "CovidRetrieval (cmn-Hans)": 73.4, - "DuRetrieval (cmn-Hans)": 88.18, - "EcomRetrieval (cmn-Hans)": 54.33, - "FiQA-PL (pol-Latn)": 37.98, - "FiQA2018": 59.91, - "GerDaLIRSmall (deu-Latn)": 20.61, - "HellaSwag": 39.45, - "LEMBNarrativeQARetrieval": 41.45, - "LEMBQMSumRetrieval": 30.36, - "LEMBSummScreenFDRetrieval": 78.48, - "LEMBWikimQARetrieval": 60.77, - "LeCaRDv2 (zho-Hans)": 64.22, - "LegalBenchConsumerContractsQA": 82.05, - "LegalBenchCorporateLobbying": 95.0, - "LegalQuAD (deu-Latn)": 44.18, - "LegalSummarization": 70.64, - "MMarcoRetrieval (cmn-Hans)": 76.54, - "MedicalRetrieval (cmn-Hans)": 55.81, - "MintakaRetrieval (ara-Arab)": 25.88, - "MintakaRetrieval (deu-Latn)": 55.66, - "MintakaRetrieval (spa-Latn)": 53.36, - "MintakaRetrieval (fra-Latn)": 51.68, - "MintakaRetrieval (hin-Deva)": 26.06, - "MintakaRetrieval (ita-Latn)": 54.91, - "MintakaRetrieval (jpn-Hira)": 34.11, - "MintakaRetrieval (por-Latn)": 54.89, - "NFCorpus": 40.86, - "NFCorpus-PL (pol-Latn)": 32.85, - "PIQA": 44.35, - "Quail": 11.67, - "RARbCode": 84.02, - "RARbMath": 82.35, - "RuBQRetrieval (rus-Cyrl)": 70.94, - "SCIDOCS": 24.41, - "SCIDOCS-PL (pol-Latn)": 18.34, - "SIQA": 7.23, - "SciFact": 79.13, - "SciFact-PL (pol-Latn)": 73.22, - "SpartQA": 9.35, - "StackOverflowQA": 93.37, - "SyntecRetrieval (fra-Latn)": 89.48, - "SyntheticText2SQL (eng-Latn_sql-Code)": 60.39, - "T2Retrieval (cmn-Hans)": 82.96, - "TRECCOVID": 74.31, - "TRECCOVID-PL (pol-Latn)": 58.15, - "TempReasonL1": 7.16, - "TempReasonL2Fact": 58.39, - "TempReasonL2Pure": 11.22, - "TempReasonL3Fact": 44.29, - "TempReasonL3Pure": 14.15, - "Touche2020": 27.78, - "VideoRetrieval (cmn-Hans)": 53.85, - "WinoGrande": 53.7, - "XPQARetrieval (ara-Arab_ara-Arab)": 45.21, - "XPQARetrieval (eng-Latn_ara-Arab)": 27.34, - "XPQARetrieval (ara-Arab_eng-Latn)": 39.43, - "XPQARetrieval (deu-Latn_deu-Latn)": 76.58, - "XPQARetrieval (eng-Latn_deu-Latn)": 55.44, - "XPQARetrieval (deu-Latn_eng-Latn)": 72.56, - "XPQARetrieval (spa-Latn_spa-Latn)": 64.54, - "XPQARetrieval (eng-Latn_spa-Latn)": 45.5, - "XPQARetrieval (spa-Latn_eng-Latn)": 61.03, - "XPQARetrieval (fra-Latn_fra-Latn)": 70.84, - "XPQARetrieval (eng-Latn_fra-Latn)": 48.14, - "XPQARetrieval (fra-Latn_eng-Latn)": 66.96, - "XPQARetrieval (hin-Deva_hin-Deva)": 74.75, - "XPQARetrieval (eng-Latn_hin-Deva)": 25.62, - "XPQARetrieval (hin-Deva_eng-Latn)": 63.9, - "XPQARetrieval (ita-Latn_ita-Latn)": 76.53, - "XPQARetrieval (eng-Latn_ita-Latn)": 46.82, - "XPQARetrieval (ita-Latn_eng-Latn)": 71.03, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 72.28, - "XPQARetrieval (eng-Latn_jpn-Hira)": 41.9, - "XPQARetrieval (jpn-Hira_eng-Latn)": 69.42, - "XPQARetrieval (kor-Hang_kor-Hang)": 40.64, - "XPQARetrieval (eng-Latn_kor-Hang)": 32.68, - "XPQARetrieval (kor-Hang_eng-Latn)": 36.0, - "XPQARetrieval (pol-Latn_pol-Latn)": 50.75, - "XPQARetrieval (eng-Latn_pol-Latn)": 33.14, - "XPQARetrieval (pol-Latn_eng-Latn)": 48.06, - "XPQARetrieval (por-Latn_por-Latn)": 49.86, - "XPQARetrieval (eng-Latn_por-Latn)": 33.06, - "XPQARetrieval (por-Latn_eng-Latn)": 48.45, - "XPQARetrieval (tam-Taml_tam-Taml)": 41.78, - "XPQARetrieval (eng-Latn_tam-Taml)": 10.95, - "XPQARetrieval (tam-Taml_eng-Latn)": 21.26, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 65.29, - "XPQARetrieval (eng-Latn_cmn-Hans)": 35.86, - "XPQARetrieval (cmn-Hans_eng-Latn)": 58.12 - }, - { - "Model": "GritLM-7B", - "LEMBNeedleRetrieval": 33.25, - "LEMBPasskeyRetrieval": 38.25 - } - ], - "recall_at_1": [ - { - "Model": "GritLM-7B", - "BrightRetrieval (biology)": 37.46, - "BrightRetrieval (robotics)": 17.82, - "BrightRetrieval (pony)": 0.0, - "BrightRetrieval (sustainable_living)": 32.36, - "BrightRetrieval (psychology)": 35.35, - "BrightRetrieval (stackoverflow)": 20.08, - "BrightRetrieval (earth_science)": 39.44, - "BrightRetrieval (economics)": 25.73 + "Model": "bge-small-zh-v1.5", + "CmedqaRetrieval": 35.11, + "CovidRetrieval": 70.14, + "DuRetrieval": 77.28, + "EcomRetrieval": 55.71, + "MMarcoRetrieval": 63.48, + "MedicalRetrieval": 49.8, + "T2Retrieval": 76.43, + "VideoRetrieval": 66.19 } ] }, "STS": { "cosine_spearman": [ { - "Model": "GritLM-7B", - "AFQMC (cmn-Hans)": 35.59, - "ATEC (cmn-Hans)": 40.89, - "BIOSSES": 86.32, - "BQ (cmn-Hans)": 49.18, - "CDSC-R (pol-Latn)": 93.38, - "LCQMC (cmn-Hans)": 75.52, - "PAWSX (cmn-Hans)": 16.4, - "RUParaPhraserSTS (rus-Cyrl)": 74.37, - "RuSTSBenchmarkSTS (rus-Cyrl)": 81.07, - "SICK-R": 83.13, - "SICK-R-PL (pol-Latn)": 76.45, - "SICKFr (fra-Latn)": 80.25, - "STS12": 77.34, - "STS13": 85.04, - "STS14": 82.91, - "STS15": 88.13, - "STS16": 86.24, - "STS17 (ita-Latn_eng-Latn)": 88.43, - "STS17 (nld-Latn_eng-Latn)": 88.29, - "STS17 (eng-Latn_tur-Latn)": 77.48, - "STS17 (fra-Latn_eng-Latn)": 87.9, - "STS17 (eng-Latn_deu-Latn)": 88.93, - "STS17 (ara-Arab)": 79.26, - "STS17 (kor-Hang)": 78.74, - "STS17 (eng-Latn_ara-Arab)": 74.46, - "STS17 (en-en)": 90.14, - "STS17 (spa-Latn_eng-Latn)": 87.47, - "STS17 (spa-Latn)": 87.12, - "STSB (cmn-Hans)": 78.12, - "STSBenchmark": 85.64, - "STSBenchmarkMultilingualSTS (en)": 85.65, - "STSBenchmarkMultilingualSTS (fra-Latn)": 81.96, - "STSBenchmarkMultilingualSTS (pol-Latn)": 80.21, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 80.84, - "STSBenchmarkMultilingualSTS (nld-Latn)": 80.37, - "STSBenchmarkMultilingualSTS (spa-Latn)": 82.81, - "STSBenchmarkMultilingualSTS (por-Latn)": 80.98, - "STSBenchmarkMultilingualSTS (deu-Latn)": 82.08, - "STSBenchmarkMultilingualSTS (ita-Latn)": 81.69, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 79.73 - }, - { - "Model": "GritLM-7B", - "AFQMC (cmn-Hans)": 35.59, - "ATEC (cmn-Hans)": 40.89, - "BIOSSES": 86.32, - "BQ (cmn-Hans)": 49.18, - "CDSC-R (pol-Latn)": 93.38, - "LCQMC (cmn-Hans)": 75.52, - "PAWSX (cmn-Hans)": 16.4, - "RUParaPhraserSTS (rus-Cyrl)": 74.37, - "RuSTSBenchmarkSTS (rus-Cyrl)": 81.07, - "SICK-R": 83.13, - "SICK-R-PL (pol-Latn)": 76.45, - "SICKFr (fra-Latn)": 80.25, - "STS12": 77.34, - "STS13": 85.04, - "STS14": 82.91, - "STS15": 88.13, - "STS16": 86.24, - "STS17 (ita-Latn_eng-Latn)": 88.43, - "STS17 (nld-Latn_eng-Latn)": 88.29, - "STS17 (eng-Latn_tur-Latn)": 77.48, - "STS17 (fra-Latn_eng-Latn)": 87.9, - "STS17 (eng-Latn_deu-Latn)": 88.93, - "STS17 (ara-Arab)": 79.26, - "STS17 (kor-Hang)": 78.74, - "STS17 (eng-Latn_ara-Arab)": 74.46, - "STS17 (en-en)": 90.14, - "STS17 (spa-Latn_eng-Latn)": 87.47, - "STS17 (spa-Latn)": 87.12, - "STSB (cmn-Hans)": 78.12, - "STSBenchmark": 85.64, - "STSBenchmarkMultilingualSTS (en)": 85.65, - "STSBenchmarkMultilingualSTS (fra-Latn)": 81.96, - "STSBenchmarkMultilingualSTS (pol-Latn)": 80.21, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 80.84, - "STSBenchmarkMultilingualSTS (nld-Latn)": 80.37, - "STSBenchmarkMultilingualSTS (spa-Latn)": 82.81, - "STSBenchmarkMultilingualSTS (por-Latn)": 80.98, - "STSBenchmarkMultilingualSTS (deu-Latn)": 82.08, - "STSBenchmarkMultilingualSTS (ita-Latn)": 81.69, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 79.73 + "Model": "bge-small-zh-v1.5", + "AFQMC": 33.42, + "ATEC": 43.01, + "BQ": 55.22, + "LCQMC": 72.19, + "PAWSX": 9.26, + "QBQTC": 35.29, + "STS22": 67.72, + "STSB": 76.73 } ] }, "Summarization": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "bm25": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "GritLM-7B", - "SummEval": 30.39 - }, + "Model": "bm25", + "BrightRetrieval (robotics)": 13.53, + "BrightRetrieval (pony)": 7.93, + "BrightRetrieval (leetcode)": 24.37, + "BrightRetrieval (earth_science)": 27.06, + "BrightRetrieval (stackoverflow)": 16.55, + "BrightRetrieval (economics)": 14.87, + "BrightRetrieval (theoremqa_questions)": 9.78, + "BrightRetrieval (theoremqa_theorems)": 4.75, + "BrightRetrieval (psychology)": 12.51, + "BrightRetrieval (sustainable_living)": 15.22, + "BrightRetrieval (biology)": 19.19, + "BrightRetrieval (aops)": 6.2 + } + ], + "recall_at_1": [ { - "Model": "GritLM-7B", - "SummEval": 30.39 + "Model": "bm25", + "BrightRetrieval (robotics)": 7.43, + "BrightRetrieval (pony)": 5.35, + "BrightRetrieval (biology)": 10.68, + "BrightRetrieval (stackoverflow)": 22.22, + "BrightRetrieval (earth_science)": 15.37, + "BrightRetrieval (psychology)": 8.42, + "BrightRetrieval (sustainable_living)": 10.68, + "BrightRetrieval (economics)": 10.68 } ] }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, "MultilabelClassification": { - "accuracy": [ + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ { - "Model": "GritLM-7B", - "CEDRClassification (rus-Cyrl)": 42.68, - "SensitiveTopicsClassification (rus-Cyrl)": 28.52 + "Model": "bm25", + "Core17InstructionRetrieval": -1.06, + "News21InstructionRetrieval": -2.15, + "Robust04InstructionRetrieval": -3.06 } ] + } + }, + "bm25s": { + "BitextMining": { + "f1": [] }, - "InstructionRetrieval": { - "p-MRR": [ + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "GritLM-7B", - "Core17InstructionRetrieval": 6.7, - "News21InstructionRetrieval": 1.22, - "Robust04InstructionRetrieval": 2.44 + "Model": "bm25s", + "ArguAna": 49.28, + "CQADupstackRetrieval": 31.86, + "ClimateFEVER": 13.62, + "DBPedia": 29.91, + "FEVER": 48.09, + "FiQA2018": 25.14, + "HotpotQA": 56.91, + "MSMARCO": 21.89, + "NFCorpus": 32.08, + "NQ": 28.5, + "QuoraRetrieval": 80.42, + "SCIDOCS": 15.78, + "SciFact": 68.7, + "TRECCOVID": 62.31, + "Touche2020": 33.05 } ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] } }, - "all-MiniLM-L6-v2": { + "contriever": { "BitextMining": { - "f1": [ + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "all-MiniLM-L6-v2", - "BornholmBitextMining": 29.68, - "BornholmBitextMining (dan-Latn)": 29.68, - "Tatoeba (gle-Latn_eng-Latn)": 2.75, - "Tatoeba (ell-Grek_eng-Latn)": 0.1, - "Tatoeba (heb-Hebr_eng-Latn)": 0.22, - "Tatoeba (mon-Cyrl_eng-Latn)": 0.38, - "Tatoeba (lat-Latn_eng-Latn)": 5.04, - "Tatoeba (csb-Latn_eng-Latn)": 3.78, - "Tatoeba (dsb-Latn_eng-Latn)": 2.9, - "Tatoeba (tel-Telu_eng-Latn)": 0.46, - "Tatoeba (ara-Arab_eng-Latn)": 0.0, - "Tatoeba (lfn-Latn_eng-Latn)": 4.55, - "Tatoeba (bul-Cyrl_eng-Latn)": 0.21, - "Tatoeba (kur-Latn_eng-Latn)": 5.21, - "Tatoeba (fao-Latn_eng-Latn)": 5.33, - "Tatoeba (kat-Geor_eng-Latn)": 0.3, - "Tatoeba (hsb-Latn_eng-Latn)": 2.65, - "Tatoeba (tam-Taml_eng-Latn)": 0.33, - "Tatoeba (vie-Latn_eng-Latn)": 3.07, - "Tatoeba (pes-Arab_eng-Latn)": 0.0, - "Tatoeba (slk-Latn_eng-Latn)": 3.27, - "Tatoeba (bos-Latn_eng-Latn)": 5.58, - "Tatoeba (ukr-Cyrl_eng-Latn)": 0.3, - "Tatoeba (gsw-Latn_eng-Latn)": 11.33, - "Tatoeba (bre-Latn_eng-Latn)": 3.22, - "Tatoeba (uig-Arab_eng-Latn)": 0.2, - "Tatoeba (cbk-Latn_eng-Latn)": 7.04, - "Tatoeba (ile-Latn_eng-Latn)": 13.54, - "Tatoeba (lit-Latn_eng-Latn)": 0.92, - "Tatoeba (mar-Deva_eng-Latn)": 0.0, - "Tatoeba (tha-Thai_eng-Latn)": 0.3, - "Tatoeba (mhr-Cyrl_eng-Latn)": 0.0, - "Tatoeba (max-Deva_eng-Latn)": 6.93, - "Tatoeba (yid-Hebr_eng-Latn)": 0.14, - "Tatoeba (khm-Khmr_eng-Latn)": 0.42, - "Tatoeba (ina-Latn_eng-Latn)": 17.63, - "Tatoeba (ita-Latn_eng-Latn)": 9.9, - "Tatoeba (bel-Cyrl_eng-Latn)": 0.5, - "Tatoeba (srp-Cyrl_eng-Latn)": 1.28, - "Tatoeba (pol-Latn_eng-Latn)": 2.58, - "Tatoeba (slv-Latn_eng-Latn)": 3.25, - "Tatoeba (tzl-Latn_eng-Latn)": 4.58, - "Tatoeba (uzb-Latn_eng-Latn)": 2.34, - "Tatoeba (ido-Latn_eng-Latn)": 7.48, - "Tatoeba (rus-Cyrl_eng-Latn)": 0.07, - "Tatoeba (cha-Latn_eng-Latn)": 13.29, - "Tatoeba (wuu-Hans_eng-Latn)": 0.6, - "Tatoeba (urd-Arab_eng-Latn)": 0.1, - "Tatoeba (hye-Armn_eng-Latn)": 0.41, - "Tatoeba (ber-Tfng_eng-Latn)": 4.69, - "Tatoeba (por-Latn_eng-Latn)": 8.29, - "Tatoeba (nov-Latn_eng-Latn)": 13.97, - "Tatoeba (mal-Mlym_eng-Latn)": 0.15, - "Tatoeba (fra-Latn_eng-Latn)": 8.17, - "Tatoeba (hin-Deva_eng-Latn)": 0.0, - "Tatoeba (nds-Latn_eng-Latn)": 9.56, - "Tatoeba (tat-Cyrl_eng-Latn)": 0.44, - "Tatoeba (kab-Latn_eng-Latn)": 0.96, - "Tatoeba (jpn-Jpan_eng-Latn)": 0.97, - "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, - "Tatoeba (yue-Hant_eng-Latn)": 0.86, - "Tatoeba (dtp-Latn_eng-Latn)": 1.88, - "Tatoeba (xho-Latn_eng-Latn)": 4.01, - "Tatoeba (ind-Latn_eng-Latn)": 3.86, - "Tatoeba (glg-Latn_eng-Latn)": 9.31, - "Tatoeba (zsm-Latn_eng-Latn)": 4.24, - "Tatoeba (swh-Latn_eng-Latn)": 5.8, - "Tatoeba (ast-Latn_eng-Latn)": 6.84, - "Tatoeba (spa-Latn_eng-Latn)": 5.63, - "Tatoeba (cat-Latn_eng-Latn)": 6.93, - "Tatoeba (pms-Latn_eng-Latn)": 7.62, - "Tatoeba (pam-Latn_eng-Latn)": 3.54, - "Tatoeba (kaz-Cyrl_eng-Latn)": 0.42, - "Tatoeba (hrv-Latn_eng-Latn)": 3.83, - "Tatoeba (epo-Latn_eng-Latn)": 5.46, - "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, - "Tatoeba (swg-Latn_eng-Latn)": 8.92, - "Tatoeba (kzj-Latn_eng-Latn)": 2.78, - "Tatoeba (ang-Latn_eng-Latn)": 15.64, - "Tatoeba (cmn-Hans_eng-Latn)": 1.92, - "Tatoeba (nob-Latn_eng-Latn)": 4.34, - "Tatoeba (lvs-Latn_eng-Latn)": 2.61, - "Tatoeba (dan-Latn_eng-Latn)": 7.84, - "Tatoeba (aze-Latn_eng-Latn)": 1.04, - "Tatoeba (tur-Latn_eng-Latn)": 3.59, - "Tatoeba (sqi-Latn_eng-Latn)": 3.58, - "Tatoeba (hun-Latn_eng-Latn)": 3.56, - "Tatoeba (awa-Deva_eng-Latn)": 0.51, - "Tatoeba (afr-Latn_eng-Latn)": 5.89, - "Tatoeba (tuk-Latn_eng-Latn)": 3.52, - "Tatoeba (est-Latn_eng-Latn)": 2.36, - "Tatoeba (fry-Latn_eng-Latn)": 11.22, - "Tatoeba (cor-Latn_eng-Latn)": 2.41, - "Tatoeba (ceb-Latn_eng-Latn)": 3.39, - "Tatoeba (eus-Latn_eng-Latn)": 5.54, - "Tatoeba (amh-Ethi_eng-Latn)": 0.25, - "Tatoeba (ron-Latn_eng-Latn)": 6.82, - "Tatoeba (oci-Latn_eng-Latn)": 6.55, - "Tatoeba (ben-Beng_eng-Latn)": 0.0, - "Tatoeba (arq-Arab_eng-Latn)": 0.11, - "Tatoeba (swe-Latn_eng-Latn)": 6.06, - "Tatoeba (nno-Latn_eng-Latn)": 5.38, - "Tatoeba (gla-Latn_eng-Latn)": 2.7, - "Tatoeba (kor-Hang_eng-Latn)": 0.45, - "Tatoeba (arz-Arab_eng-Latn)": 0.0, - "Tatoeba (fin-Latn_eng-Latn)": 2.79, - "Tatoeba (ces-Latn_eng-Latn)": 3.04, - "Tatoeba (deu-Latn_eng-Latn)": 7.89, - "Tatoeba (cym-Latn_eng-Latn)": 6.09, - "Tatoeba (war-Latn_eng-Latn)": 4.94, - "Tatoeba (isl-Latn_eng-Latn)": 2.37, - "Tatoeba (nld-Latn_eng-Latn)": 10.16, - "Tatoeba (tgl-Latn_eng-Latn)": 2.69, - "Tatoeba (jav-Latn_eng-Latn)": 3.37 + "Model": "contriever", + "ARCChallenge": 8.62, + "AlphaNLI": 31.77, + "HellaSwag": 17.73, + "PIQA": 24.64, + "Quail": 4.97, + "RARbCode": 9.28, + "RARbMath": 30.76, + "SIQA": 1.27, + "SpartQA": 10.94, + "TempReasonL1": 1.93, + "TempReasonL2Fact": 22.68, + "TempReasonL2Pure": 1.12, + "TempReasonL3Fact": 20.62, + "TempReasonL3Pure": 7.8, + "WinoGrande": 47.15 } ] }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "contriever-base-msmarco": { + "BitextMining": { + "f1": [] + }, "Classification": { "accuracy": [ { - "Model": "all-MiniLM-L6-v2", - "AllegroReviews (pol-Latn)": 24.64, - "AmazonCounterfactualClassification (en)": 63.64, - "AmazonCounterfactualClassification (en-ext)": 65.59, - "AmazonCounterfactualClassification (deu-Latn)": 57.82, - "AmazonCounterfactualClassification (jpn-Jpan)": 60.9, - "AmazonPolarityClassification": 64.26, - "AmazonReviewsClassification (en)": 30.85, - "AmazonReviewsClassification (deu-Latn)": 26.44, - "AmazonReviewsClassification (spa-Latn)": 27.35, - "AmazonReviewsClassification (fra-Latn)": 26.88, - "AmazonReviewsClassification (jpn-Jpan)": 23.78, - "AmazonReviewsClassification (cmn-Hans)": 23.67, - "AngryTweetsClassification": 42.49, - "AngryTweetsClassification (dan-Latn)": 42.48, - "Banking77Classification": 80.04, - "CBD (pol-Latn)": 50.9, - "DKHateClassification": 55.05, - "DanishPoliticalCommentsClassification": 26.96, - "DanishPoliticalCommentsClassification (dan-Latn)": 26.7, - "EmotionClassification": 40.83, - "GeoreviewClassification (rus-Cyrl)": 27.08, - "HeadlineClassification (rus-Cyrl)": 27.77, - "IFlyTek (cmn-Hans)": 16.09, - "ImdbClassification": 61.76, - "InappropriatenessClassification (rus-Cyrl)": 51.73, - "JDReview (cmn-Hans)": 59.98, - "KinopoiskClassification (rus-Cyrl)": 33.93, - "LccSentimentClassification": 38.47, - "LccSentimentClassification (dan-Latn)": 38.53, - "MTOPDomainClassification (en)": 91.68, - "MTOPDomainClassification (deu-Latn)": 70.47, - "MTOPDomainClassification (spa-Latn)": 72.99, - "MTOPDomainClassification (fra-Latn)": 75.1, - "MTOPDomainClassification (hin-Deva)": 40.74, - "MTOPDomainClassification (tha-Thai)": 15.66, - "MTOPIntentClassification (en)": 61.55, - "MTOPIntentClassification (deu-Latn)": 45.7, - "MTOPIntentClassification (spa-Latn)": 44.19, - "MTOPIntentClassification (fra-Latn)": 39.67, - "MTOPIntentClassification (hin-Deva)": 18.69, - "MTOPIntentClassification (tha-Thai)": 5.03, - "MasakhaNEWSClassification (fra)": 74.05, - "MasakhaNEWSClassification (amh-Ethi)": 33.03, - "MasakhaNEWSClassification (eng)": 77.11, - "MasakhaNEWSClassification (fra-Latn)": 68.84, - "MasakhaNEWSClassification (hau-Latn)": 50.49, - "MasakhaNEWSClassification (ibo-Latn)": 52.15, - "MasakhaNEWSClassification (lin-Latn)": 68.29, - "MasakhaNEWSClassification (lug-Latn)": 47.58, - "MasakhaNEWSClassification (orm-Ethi)": 50.68, - "MasakhaNEWSClassification (pcm-Latn)": 92.56, - "MasakhaNEWSClassification (run-Latn)": 54.81, - "MasakhaNEWSClassification (sna-Latn)": 65.58, - "MasakhaNEWSClassification (som-Latn)": 39.8, - "MasakhaNEWSClassification (swa-Latn)": 47.25, - "MasakhaNEWSClassification (tir-Ethi)": 28.97, - "MasakhaNEWSClassification (xho-Latn)": 54.14, - "MasakhaNEWSClassification (yor-Latn)": 55.01, - "MassiveIntentClassification (en)": 66.94, - "MassiveIntentClassification (da)": 40.99, - "MassiveIntentClassification (nb)": 39.34, - "MassiveIntentClassification (sv)": 38.1, - "MassiveIntentClassification (mon-Cyrl)": 20.35, - "MassiveIntentClassification (tam-Taml)": 11.31, - "MassiveIntentClassification (rus-Cyrl)": 27.58, - "MassiveIntentClassification (fin-Latn)": 38.37, - "MassiveIntentClassification (ell-Grek)": 24.19, - "MassiveIntentClassification (hin-Deva)": 17.7, - "MassiveIntentClassification (fra-Latn)": 42.55, - "MassiveIntentClassification (por-Latn)": 43.76, - "MassiveIntentClassification (swe-Latn)": 38.09, - "MassiveIntentClassification (mal-Mlym)": 2.89, - "MassiveIntentClassification (cmo-Hant)": 22.56, - "MassiveIntentClassification (vie-Latn)": 37.09, - "MassiveIntentClassification (isl-Latn)": 29.95, - "MassiveIntentClassification (ind-Latn)": 39.02, - "MassiveIntentClassification (jav-Latn)": 35.91, - "MassiveIntentClassification (kat-Geor)": 9.07, - "MassiveIntentClassification (tur-Latn)": 33.76, - "MassiveIntentClassification (heb-Hebr)": 22.48, - "MassiveIntentClassification (lav-Latn)": 36.97, - "MassiveIntentClassification (ben-Beng)": 13.1, - "MassiveIntentClassification (afr-Latn)": 37.45, - "MassiveIntentClassification (swa-Latn)": 34.98, - "MassiveIntentClassification (tel-Telu)": 2.46, - "MassiveIntentClassification (sqi-Latn)": 40.7, - "MassiveIntentClassification (fas-Arab)": 19.1, - "MassiveIntentClassification (aze-Latn)": 30.63, - "MassiveIntentClassification (ara-Arab)": 19.05, - "MassiveIntentClassification (deu-Latn)": 43.44, - "MassiveIntentClassification (tha-Thai)": 11.26, - "MassiveIntentClassification (cym-Latn)": 34.54, - "MassiveIntentClassification (dan-Latn)": 41.0, - "MassiveIntentClassification (kor-Kore)": 16.05, - "MassiveIntentClassification (kan-Knda)": 3.14, - "MassiveIntentClassification (mya-Mymr)": 4.24, - "MassiveIntentClassification (nob-Latn)": 39.36, - "MassiveIntentClassification (cmo-Hans)": 24.4, - "MassiveIntentClassification (ron-Latn)": 40.54, - "MassiveIntentClassification (jpn-Jpan)": 31.87, - "MassiveIntentClassification (nld-Latn)": 40.2, - "MassiveIntentClassification (ita-Latn)": 41.59, - "MassiveIntentClassification (urd-Arab)": 14.42, - "MassiveIntentClassification (amh-Ethi)": 2.62, - "MassiveIntentClassification (hun-Latn)": 35.69, - "MassiveIntentClassification (msa-Latn)": 35.07, - "MassiveIntentClassification (pol-Latn)": 36.07, - "MassiveIntentClassification (hye-Armn)": 7.62, - "MassiveIntentClassification (tgl-Latn)": 37.92, - "MassiveIntentClassification (slv-Latn)": 36.7, - "MassiveIntentClassification (khm-Khmr)": 4.91, - "MassiveIntentClassification (spa-Latn)": 39.88, - "MassiveScenarioClassification (en)": 73.81, - "MassiveScenarioClassification (da)": 47.01, - "MassiveScenarioClassification (nb)": 44.67, - "MassiveScenarioClassification (sv)": 42.93, - "MassiveScenarioClassification (kat-Geor)": 14.92, - "MassiveScenarioClassification (tur-Latn)": 38.85, - "MassiveScenarioClassification (spa-Latn)": 49.0, - "MassiveScenarioClassification (ita-Latn)": 49.8, - "MassiveScenarioClassification (tam-Taml)": 17.37, - "MassiveScenarioClassification (slv-Latn)": 41.9, - "MassiveScenarioClassification (ara-Arab)": 25.99, - "MassiveScenarioClassification (heb-Hebr)": 24.01, - "MassiveScenarioClassification (isl-Latn)": 36.12, - "MassiveScenarioClassification (cym-Latn)": 39.0, - "MassiveScenarioClassification (nld-Latn)": 48.43, - "MassiveScenarioClassification (jpn-Jpan)": 37.3, - "MassiveScenarioClassification (mal-Mlym)": 7.67, - "MassiveScenarioClassification (dan-Latn)": 47.02, - "MassiveScenarioClassification (kan-Knda)": 7.85, - "MassiveScenarioClassification (mya-Mymr)": 10.61, - "MassiveScenarioClassification (msa-Latn)": 43.67, - "MassiveScenarioClassification (mon-Cyrl)": 25.47, - "MassiveScenarioClassification (ell-Grek)": 31.3, - "MassiveScenarioClassification (hin-Deva)": 23.71, - "MassiveScenarioClassification (hye-Armn)": 13.03, - "MassiveScenarioClassification (ron-Latn)": 48.23, - "MassiveScenarioClassification (hun-Latn)": 41.61, - "MassiveScenarioClassification (ind-Latn)": 43.46, - "MassiveScenarioClassification (jav-Latn)": 43.59, - "MassiveScenarioClassification (khm-Khmr)": 9.25, - "MassiveScenarioClassification (cmo-Hant)": 31.18, - "MassiveScenarioClassification (vie-Latn)": 40.47, - "MassiveScenarioClassification (afr-Latn)": 43.87, - "MassiveScenarioClassification (fra-Latn)": 51.14, - "MassiveScenarioClassification (amh-Ethi)": 7.57, - "MassiveScenarioClassification (fas-Arab)": 23.97, - "MassiveScenarioClassification (kor-Kore)": 20.3, - "MassiveScenarioClassification (tgl-Latn)": 45.69, - "MassiveScenarioClassification (lav-Latn)": 40.43, - "MassiveScenarioClassification (sqi-Latn)": 47.21, - "MassiveScenarioClassification (tha-Thai)": 19.5, - "MassiveScenarioClassification (aze-Latn)": 35.59, - "MassiveScenarioClassification (swa-Latn)": 43.32, - "MassiveScenarioClassification (pol-Latn)": 43.82, - "MassiveScenarioClassification (tel-Telu)": 7.95, - "MassiveScenarioClassification (rus-Cyrl)": 30.46, - "MassiveScenarioClassification (ben-Beng)": 20.56, - "MassiveScenarioClassification (fin-Latn)": 42.38, - "MassiveScenarioClassification (por-Latn)": 50.72, - "MassiveScenarioClassification (swe-Latn)": 42.95, - "MassiveScenarioClassification (nob-Latn)": 44.67, - "MassiveScenarioClassification (cmo-Hans)": 33.65, - "MassiveScenarioClassification (urd-Arab)": 23.73, - "MassiveScenarioClassification (deu-Latn)": 51.47, - "MultilingualSentiment (cmn-Hans)": 41.28, - "NoRecClassification": 40.02, - "NoRecClassification (nob-Latn)": 37.93, - "NordicLangClassification": 54.71, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 54.7, - "NorwegianParliament": 54.8, - "OnlineShopping (cmn-Hans)": 57.74, - "PAC (pol-Latn)": 59.78, - "PolEmo2.0-IN (pol-Latn)": 40.29, - "PolEmo2.0-OUT (pol-Latn)": 25.0, - "RuReviewsClassification (rus-Cyrl)": 41.79, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 10.08, - "RuSciBenchOECDClassification (rus-Cyrl)": 8.3, - "ScalaDaClassification": 50.03, - "ScalaNbClassification": 50.17, - "TNews (cmn-Hans)": 20.12, - "ToxicConversationsClassification": 62.09, - "TweetSentimentExtractionClassification": 54.04, - "Waimai (cmn-Hans)": 62.72 + "Model": "contriever-base-msmarco", + "AmazonCounterfactualClassification": 72.19, + "AmazonPolarityClassification": 68.63, + "AmazonReviewsClassification": 37.42, + "Banking77Classification": 80.02, + "EmotionClassification": 44.77, + "ImdbClassification": 67.04, + "MTOPDomainClassification": 93.18, + "MTOPIntentClassification": 69.31, + "MassiveIntentClassification": 67.78, + "MassiveScenarioClassification": 76.0, + "ToxicConversationsClassification": 67.77, + "TweetSentimentExtractionClassification": 56.1 } ] }, "Clustering": { "v_measure": [ { - "Model": "all-MiniLM-L6-v2", - "AlloProfClusteringP2P": 51.83, - "AlloProfClusteringS2S": 32.07, - "ArxivClusteringP2P": 46.55, - "ArxivClusteringS2S": 37.86, - "BiorxivClusteringP2P": 38.37, - "BiorxivClusteringS2S": 32.88, - "GeoreviewClusteringP2P (rus-Cyrl)": 20.25, - "HALClusteringS2S": 18.84, - "MLSUMClusteringP2P": 36.74, - "MLSUMClusteringP2P (rus-Cyrl)": 23.91, - "MLSUMClusteringS2S": 28.12, - "MLSUMClusteringS2S (rus-Cyrl)": 19.07, - "MasakhaNEWSClusteringP2P (fra)": 34.92, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 43.85, - "MasakhaNEWSClusteringP2P (eng)": 48.88, - "MasakhaNEWSClusteringP2P (fra-Latn)": 34.92, - "MasakhaNEWSClusteringP2P (hau-Latn)": 24.77, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 45.94, - "MasakhaNEWSClusteringP2P (lin-Latn)": 69.56, - "MasakhaNEWSClusteringP2P (lug-Latn)": 49.4, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 25.34, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 85.57, - "MasakhaNEWSClusteringP2P (run-Latn)": 50.75, - "MasakhaNEWSClusteringP2P (sna-Latn)": 41.68, - "MasakhaNEWSClusteringP2P (som-Latn)": 29.02, - "MasakhaNEWSClusteringP2P (swa-Latn)": 21.87, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 42.93, - "MasakhaNEWSClusteringP2P (xho-Latn)": 28.58, - "MasakhaNEWSClusteringP2P (yor-Latn)": 31.45, - "MasakhaNEWSClusteringS2S (fra)": 40.58, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 45.44, - "MasakhaNEWSClusteringS2S (eng)": 41.09, - "MasakhaNEWSClusteringS2S (fra-Latn)": 40.58, - "MasakhaNEWSClusteringS2S (hau-Latn)": 15.42, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 37.02, - "MasakhaNEWSClusteringS2S (lin-Latn)": 65.14, - "MasakhaNEWSClusteringS2S (lug-Latn)": 44.21, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 24.79, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 61.48, - "MasakhaNEWSClusteringS2S (run-Latn)": 51.25, - "MasakhaNEWSClusteringS2S (sna-Latn)": 42.74, - "MasakhaNEWSClusteringS2S (som-Latn)": 30.08, - "MasakhaNEWSClusteringS2S (swa-Latn)": 9.55, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 46.04, - "MasakhaNEWSClusteringS2S (xho-Latn)": 27.08, - "MasakhaNEWSClusteringS2S (yor-Latn)": 31.04, - "MedrxivClusteringP2P": 34.39, - "MedrxivClusteringS2S": 31.86, - "RedditClustering": 50.7, - "RedditClusteringP2P": 54.8, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 10.21, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 9.44, - "StackExchangeClustering": 53.14, - "StackExchangeClusteringP2P": 34.26, - "TwentyNewsgroupsClustering": 46.49 + "Model": "contriever-base-msmarco", + "ArxivClusteringP2P": 42.61, + "ArxivClusteringS2S": 32.32, + "BiorxivClusteringP2P": 34.97, + "BiorxivClusteringS2S": 29.08, + "MedrxivClusteringP2P": 31.19, + "MedrxivClusteringS2S": 27.27, + "RedditClustering": 54.89, + "RedditClusteringP2P": 57.58, + "StackExchangeClustering": 63.15, + "StackExchangeClusteringP2P": 32.25, + "TwentyNewsgroupsClustering": 46.82 } ] }, "PairClassification": { "max_ap": [ { - "Model": "all-MiniLM-L6-v2", - "CDSC-E (pol-Latn)": 47.27, - "OpusparcusPC (deu-Latn)": 89.91, - "OpusparcusPC (en)": 97.46, - "OpusparcusPC (fin-Latn)": 85.44, - "OpusparcusPC (fra-Latn)": 86.53, - "OpusparcusPC (rus-Cyrl)": 79.28, - "OpusparcusPC (swe-Latn)": 83.78, - "PSC (pol-Latn)": 81.87, - "PawsXPairClassification (deu-Latn)": 51.22, - "PawsXPairClassification (en)": 59.1, - "PawsXPairClassification (spa-Latn)": 52.21, - "PawsXPairClassification (fra-Latn)": 55.41, - "PawsXPairClassification (jpn-Hira)": 48.97, - "PawsXPairClassification (kor-Hang)": 50.53, - "PawsXPairClassification (cmn-Hans)": 53.11, - "SICK-E-PL (pol-Latn)": 47.32, - "SprintDuplicateQuestions": 94.55, - "TERRa (rus-Cyrl)": 45.03, - "TwitterSemEval2015": 67.86, - "TwitterURLCorpus": 84.7 - }, - { - "Model": "all-MiniLM-L6-v2", - "CDSC-E (pol-Latn)": 47.27, - "OpusparcusPC (fr)": 86.53, - "OpusparcusPC (deu-Latn)": 89.91, - "OpusparcusPC (en)": 97.46, - "OpusparcusPC (fin-Latn)": 85.44, - "OpusparcusPC (fra-Latn)": 86.53, - "OpusparcusPC (rus-Cyrl)": 79.28, - "OpusparcusPC (swe-Latn)": 83.78, - "PSC (pol-Latn)": 81.87, - "PawsXPairClassification (fr)": 55.51, - "PawsXPairClassification (deu-Latn)": 51.55, - "PawsXPairClassification (en)": 59.1, - "PawsXPairClassification (spa-Latn)": 52.23, - "PawsXPairClassification (fra-Latn)": 55.52, - "PawsXPairClassification (jpn-Hira)": 49.19, - "PawsXPairClassification (kor-Hang)": 50.59, - "PawsXPairClassification (cmn-Hans)": 53.18, - "SICK-E-PL (pol-Latn)": 47.39, - "SprintDuplicateQuestions": 94.55, - "TERRa (rus-Cyrl)": 45.03, - "TwitterSemEval2015": 67.86, - "TwitterURLCorpus": 84.7 + "Model": "contriever-base-msmarco", + "SprintDuplicateQuestions": 95.55, + "TwitterSemEval2015": 66.85, + "TwitterURLCorpus": 85.21 }, { - "Model": "all-MiniLM-L6-v2", - "OpusparcusPC (fr)": 86.53, - "PawsXPairClassification (fr)": 55.4, - "SprintDuplicateQuestions": 94.55, - "TwitterSemEval2015": 67.86, - "TwitterURLCorpus": 84.7 + "Model": "contriever-base-msmarco", + "SprintDuplicateQuestions": 95.55, + "TwitterSemEval2015": 66.85, + "TwitterURLCorpus": 85.21 } ] }, "Reranking": { "map": [ { - "Model": "all-MiniLM-L6-v2", - "AlloprofReranking": 31.69, - "AlloprofReranking (fra-Latn)": 62.62, - "AskUbuntuDupQuestions": 63.48, - "MMarcoReranking (cmn-Hans)": 4.74, - "MindSmallReranking": 30.8, - "RuBQReranking (rus-Cyrl)": 27.05, - "SciDocsRR": 87.12, - "StackOverflowDupQuestions": 50.77, - "SyntecReranking": 59.57, - "SyntecReranking (fra-Latn)": 67.31, - "T2Reranking (cmn-Hans)": 56.26 + "Model": "contriever-base-msmarco", + "AskUbuntuDupQuestions": 56.69, + "MindSmallReranking": 31.58, + "SciDocsRR": 76.51, + "StackOverflowDupQuestions": 47.78 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "all-MiniLM-L6-v2", - "AILACasedocs": 19.72, - "AILAStatutes": 20.52, - "ARCChallenge": 9.48, - "AlloprofRetrieval": 28.41, - "AlloprofRetrieval (fra-Latn)": 28.41, - "AlphaNLI": 28.19, - "AppsRetrieval (eng-Latn_python-Code)": 6.6, - "ArguAna": 50.17, - "ArguAna-PL (pol-Latn)": 11.5, - "BSARDRetrieval": 0.0, - "BSARDRetrieval (fra-Latn)": 4.8, - "CQADupstackRetrieval": 41.32, - "ClimateFEVER": 20.27, - "CmedqaRetrieval (cmn-Hans)": 2.03, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 35.62, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 65.99, - "CodeSearchNetCCRetrieval (python-Code)": 71.29, - "CodeSearchNetCCRetrieval (javascript-Code)": 66.75, - "CodeSearchNetCCRetrieval (go-Code)": 60.0, - "CodeSearchNetCCRetrieval (ruby-Code)": 69.32, - "CodeSearchNetCCRetrieval (java-Code)": 66.32, - "CodeSearchNetCCRetrieval (php-Code)": 58.42, - "CodeSearchNetRetrieval (python-Code)": 79.28, - "CodeSearchNetRetrieval (javascript-Code)": 63.62, - "CodeSearchNetRetrieval (go-Code)": 79.84, - "CodeSearchNetRetrieval (ruby-Code)": 72.84, - "CodeSearchNetRetrieval (java-Code)": 51.65, - "CodeSearchNetRetrieval (php-Code)": 70.82, - "CodeTransOceanContest (python-Code_c++-Code)": 61.78, - "CodeTransOceanDL": 27.94, - "CosQA (eng-Latn_python-Code)": 33.02, - "CovidRetrieval (cmn-Hans)": 0.8, - "DBPedia": 32.33, - "DuRetrieval (cmn-Hans)": 3.03, - "EcomRetrieval (cmn-Hans)": 3.7, - "FEVER": 51.93, - "FiQA-PL (pol-Latn)": 2.29, - "FiQA2018": 36.87, - "GerDaLIRSmall (deu-Latn)": 2.41, - "HellaSwag": 24.21, - "HotpotQA": 46.51, - "LEMBNarrativeQARetrieval": 18.27, - "LEMBQMSumRetrieval": 16.32, - "LEMBSummScreenFDRetrieval": 54.8, - "LEMBWikimQARetrieval": 46.23, - "LeCaRDv2 (zho-Hans)": 17.5, - "LegalBenchConsumerContractsQA": 65.6, - "LegalBenchCorporateLobbying": 86.41, - "LegalQuAD (deu-Latn)": 11.81, - "LegalSummarization": 59.0, - "MIRACLRetrieval (rus-Cyrl)": 0.39, - "MMarcoRetrieval (cmn-Hans)": 6.21, - "MSMARCO": 36.54, - "MedicalRetrieval (cmn-Hans)": 1.76, - "MintakaRetrieval (fr)": 9.19, - "MintakaRetrieval (ara-Arab)": 2.22, - "MintakaRetrieval (deu-Latn)": 15.43, - "MintakaRetrieval (spa-Latn)": 7.72, - "MintakaRetrieval (fra-Latn)": 9.19, - "MintakaRetrieval (hin-Deva)": 2.65, - "MintakaRetrieval (ita-Latn)": 8.48, - "MintakaRetrieval (jpn-Hira)": 6.72, - "MintakaRetrieval (por-Latn)": 9.76, - "NFCorpus": 31.59, - "NFCorpus-PL (pol-Latn)": 10.62, - "NQ": 43.87, - "PIQA": 25.28, - "Quail": 3.92, - "QuoraRetrieval": 87.56, - "RARbCode": 44.27, - "RARbMath": 68.19, - "RiaNewsRetrieval (rus-Cyrl)": 0.67, - "RuBQRetrieval (rus-Cyrl)": 2.64, - "SCIDOCS": 21.64, - "SCIDOCS-PL (pol-Latn)": 3.75, - "SIQA": 1.56, - "SciFact": 64.51, - "SciFact-PL (pol-Latn)": 16.14, - "SpartQA": 1.65, - "StackOverflowQA": 83.96, - "SyntecRetrieval": 60.15, - "SyntecRetrieval (fra-Latn)": 60.15, - "SyntheticText2SQL (eng-Latn_sql-Code)": 44.35, - "T2Retrieval (cmn-Hans)": 1.6, - "TRECCOVID": 47.23, - "TRECCOVID-PL (pol-Latn)": 8.66, - "TempReasonL1": 1.53, - "TempReasonL2Fact": 17.65, - "TempReasonL2Pure": 0.46, - "TempReasonL3Fact": 14.16, - "TempReasonL3Pure": 6.33, - "Touche2020": 16.9, - "VideoRetrieval (cmn-Hans)": 9.79, - "WinoGrande": 47.35, - "XPQARetrieval (fr)": 51.79, - "XPQARetrieval (ara-Arab_ara-Arab)": 8.05, - "XPQARetrieval (eng-Latn_ara-Arab)": 1.9, - "XPQARetrieval (ara-Arab_eng-Latn)": 6.87, - "XPQARetrieval (deu-Latn_deu-Latn)": 53.25, - "XPQARetrieval (eng-Latn_deu-Latn)": 10.99, - "XPQARetrieval (deu-Latn_eng-Latn)": 27.59, - "XPQARetrieval (spa-Latn_spa-Latn)": 38.87, - "XPQARetrieval (eng-Latn_spa-Latn)": 5.46, - "XPQARetrieval (spa-Latn_eng-Latn)": 22.2, - "XPQARetrieval (fra-Latn_fra-Latn)": 51.79, - "XPQARetrieval (eng-Latn_fra-Latn)": 8.57, - "XPQARetrieval (fra-Latn_eng-Latn)": 31.36, - "XPQARetrieval (hin-Deva_hin-Deva)": 35.28, - "XPQARetrieval (eng-Latn_hin-Deva)": 6.28, - "XPQARetrieval (hin-Deva_eng-Latn)": 6.0, - "XPQARetrieval (ita-Latn_ita-Latn)": 54.57, - "XPQARetrieval (eng-Latn_ita-Latn)": 6.79, - "XPQARetrieval (ita-Latn_eng-Latn)": 24.13, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 39.23, - "XPQARetrieval (eng-Latn_jpn-Hira)": 4.1, - "XPQARetrieval (jpn-Hira_eng-Latn)": 13.05, - "XPQARetrieval (kor-Hang_kor-Hang)": 10.2, - "XPQARetrieval (eng-Latn_kor-Hang)": 5.72, - "XPQARetrieval (kor-Hang_eng-Latn)": 6.37, - "XPQARetrieval (pol-Latn_pol-Latn)": 22.33, - "XPQARetrieval (eng-Latn_pol-Latn)": 7.58, - "XPQARetrieval (pol-Latn_eng-Latn)": 14.43, - "XPQARetrieval (por-Latn_por-Latn)": 31.93, - "XPQARetrieval (eng-Latn_por-Latn)": 5.9, - "XPQARetrieval (por-Latn_eng-Latn)": 20.74, - "XPQARetrieval (tam-Taml_tam-Taml)": 7.39, - "XPQARetrieval (eng-Latn_tam-Taml)": 3.42, - "XPQARetrieval (tam-Taml_eng-Latn)": 2.91, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 19.41, - "XPQARetrieval (eng-Latn_cmn-Hans)": 5.05, - "XPQARetrieval (cmn-Hans_eng-Latn)": 8.77 - }, - { - "Model": "all-MiniLM-L6-v2", - "LEMBNeedleRetrieval": 20.0, - "LEMBPasskeyRetrieval": 23.25 + "Model": "contriever-base-msmarco", + "ArguAna": 48.32, + "CQADupstackRetrieval": 33.67, + "ClimateFEVER": 24.79, + "DBPedia": 38.1, + "FEVER": 59.29, + "FiQA2018": 27.42, + "HotpotQA": 56.81, + "MSMARCO": 36.77, + "NFCorpus": 31.32, + "NQ": 41.83, + "QuoraRetrieval": 86.72, + "SCIDOCS": 17.12, + "SciFact": 65.51, + "TRECCOVID": 44.77, + "Touche2020": 15.79 } ] }, "STS": { "cosine_spearman": [ { - "Model": "all-MiniLM-L6-v2", - "AFQMC (cmn-Hans)": 8.59, - "ATEC (cmn-Hans)": 13.52, - "BIOSSES": 81.64, - "BQ (cmn-Hans)": 23.84, - "CDSC-R (pol-Latn)": 79.45, - "LCQMC (cmn-Hans)": 23.85, - "PAWSX (cmn-Hans)": 7.21, - "RUParaPhraserSTS (rus-Cyrl)": 43.93, - "RuSTSBenchmarkSTS (rus-Cyrl)": 55.56, - "SICK-R": 77.58, - "SICK-R-PL (pol-Latn)": 52.43, - "SICKFr (fra-Latn)": 62.48, - "STS12": 72.37, - "STS13": 80.6, - "STS14": 75.59, - "STS15": 85.39, - "STS16": 78.99, - "STS17 (spa-Latn)": 76.12, - "STS17 (eng-Latn_deu-Latn)": 35.82, - "STS17 (ita-Latn_eng-Latn)": 24.45, - "STS17 (ara-Arab)": 50.89, - "STS17 (fra-Latn_eng-Latn)": 37.09, - "STS17 (eng-Latn_tur-Latn)": 4.5, - "STS17 (en-en)": 87.59, - "STS17 (eng-Latn_ara-Arab)": -4.28, - "STS17 (nld-Latn_eng-Latn)": 29.0, - "STS17 (kor-Hang)": 43.39, - "STS17 (spa-Latn_eng-Latn)": 16.31, - "STS22 (ara-Arab)": 22.64, - "STS22 (rus-Cyrl)": 14.72, - "STS22 (en)": 67.71, - "STS22 (tur-Latn)": 33.69, - "STS22 (pol-Latn)": 26.77, - "STS22 (spa-Latn_eng-Latn)": 53.42, - "STS22 (cmn-Hans)": 44.93, - "STS22 (spa-Latn)": 54.78, - "STS22 (deu-Latn_fra-Latn)": 30.07, - "STS22 (deu-Latn_pol-Latn)": -4.93, - "STS22 (fra-Latn)": 77.0, - "STS22 (deu-Latn)": 31.04, - "STS22 (pol-Latn_eng-Latn)": 32.8, - "STS22 (deu-Latn_eng-Latn)": 44.04, - "STS22 (cmn-Hans_eng-Latn)": 41.64, - "STS22 (spa-Latn_ita-Latn)": 44.27, - "STS22 (fra-Latn_pol-Latn)": 50.71, - "STS22 (ita-Latn)": 60.4, - "STSB (cmn-Hans)": 37.8, - "STSBenchmark": 82.03, - "STSBenchmarkMultilingualSTS (pol-Latn)": 56.42, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.74, - "STSBenchmarkMultilingualSTS (en)": 82.03, - "STSBenchmarkMultilingualSTS (nld-Latn)": 55.46, - "STSBenchmarkMultilingualSTS (ita-Latn)": 59.24, - "STSBenchmarkMultilingualSTS (por-Latn)": 61.56, - "STSBenchmarkMultilingualSTS (deu-Latn)": 62.4, - "STSBenchmarkMultilingualSTS (fra-Latn)": 64.93, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.55, - "STSBenchmarkMultilingualSTS (spa-Latn)": 61.62 - }, - { - "Model": "all-MiniLM-L6-v2", - "BIOSSES": 81.64, - "SICK-R": 77.58, - "SICKFr": 62.48, - "STS12": 72.37, - "STS13": 80.6, - "STS14": 75.59, - "STS15": 85.39, - "STS16": 78.99, - "STS17 (ar-ar)": 50.89, - "STS17 (en-ar)": -4.28, - "STS17 (en-de)": 35.82, - "STS17 (en-en)": 87.59, - "STS17 (en-tr)": 4.5, - "STS17 (es-en)": 16.31, - "STS17 (es-es)": 76.12, - "STS17 (fr-en)": 37.09, - "STS17 (it-en)": 24.45, - "STS17 (ko-ko)": 43.39, - "STS17 (nl-en)": 29.0, - "STS22 (ar)": 22.64, - "STS22 (de)": 31.04, - "STS22 (de-en)": 44.04, - "STS22 (de-fr)": 30.07, - "STS22 (de-pl)": 4.93, - "STS22 (en)": 67.21, - "STS22 (es)": 54.78, - "STS22 (es-en)": 53.42, - "STS22 (es-it)": 44.27, - "STS22 (fr)": 77.0, - "STS22 (fr-pl)": 50.71, - "STS22 (it)": 60.4, - "STS22 (pl)": 26.77, - "STS22 (pl-en)": 32.8, - "STS22 (ru)": 14.72, - "STS22 (tr)": 33.69, - "STS22 (zh)": 44.93, - "STS22 (zh-en)": 41.64, - "STSBenchmark": 82.03, - "STSBenchmarkMultilingualSTS (fr)": 64.93 + "Model": "contriever-base-msmarco", + "BIOSSES": 83.32, + "SICK-R": 70.2, + "STS12": 64.34, + "STS13": 80.03, + "STS14": 74.51, + "STS15": 83.3, + "STS16": 79.67, + "STS17": 86.32, + "STS22": 64.64, + "STSBenchmark": 78.81 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "all-MiniLM-L6-v2", - "SummEval": 30.81, - "SummEvalFr": 28.28 - }, - { - "Model": "all-MiniLM-L6-v2", - "SummEval": 30.81, - "SummEvalFr (fra-Latn)": 28.29 - }, - { - "Model": "all-MiniLM-L6-v2", - "SummEval": 30.81, - "SummEvalFr (fra-Latn)": 28.29 + "Model": "contriever-base-msmarco", + "SummEval": 30.36 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "all-MiniLM-L6-v2", - "CEDRClassification (rus-Cyrl)": 32.72, - "SensitiveTopicsClassification (rus-Cyrl)": 17.82 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "all-MiniLM-L6-v2", - "Core17InstructionRetrieval": -0.35, - "News21InstructionRetrieval": -0.25, - "Robust04InstructionRetrieval": -7.93 + "Model": "contriever-base-msmarco", + "Core17InstructionRetrieval": -2.48, + "News21InstructionRetrieval": -2.83, + "Robust04InstructionRetrieval": -6.12 } ] } }, - "nomic-embed-text-v1.5-512": { + "contriever-instruct": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "nomic-embed-text-v1.5-512", - "AmazonCounterfactualClassification (en)": 74.27, - "AmazonPolarityClassification": 91.89, - "AmazonReviewsClassification (en)": 46.97, - "Banking77Classification": 84.15, - "EmotionClassification": 47.73, - "ImdbClassification": 85.47, - "MTOPDomainClassification (en)": 92.62, - "MTOPIntentClassification (en)": 74.27, - "MassiveIntentClassification (en)": 73.07, - "MassiveScenarioClassification (en)": 76.82, - "ToxicConversationsClassification": 71.25, - "TweetSentimentExtractionClassification": 60.4 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "nomic-embed-text-v1.5-512", - "ArxivClusteringP2P": 45.45, - "ArxivClusteringS2S": 36.19, - "BiorxivClusteringP2P": 38.41, - "BiorxivClusteringS2S": 32.28, - "MedrxivClusteringP2P": 34.47, - "MedrxivClusteringS2S": 31.43, - "RedditClustering": 55.9, - "RedditClusteringP2P": 60.58, - "StackExchangeClustering": 62.94, - "StackExchangeClusteringP2P": 33.81, - "TwentyNewsgroupsClustering": 49.36 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "nomic-embed-text-v1.5-512", - "SprintDuplicateQuestions": 92.91, - "TwitterSemEval2015": 74.3, - "TwitterURLCorpus": 86.57 - }, - { - "Model": "nomic-embed-text-v1.5-512", - "SprintDuplicateQuestions": 92.91, - "TwitterSemEval2015": 74.3, - "TwitterURLCorpus": 86.57 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "nomic-embed-text-v1.5-512", - "AskUbuntuDupQuestions": 61.6, - "MindSmallReranking": 30.34, - "SciDocsRR": 80.33, - "StackOverflowDupQuestions": 50.32 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "nomic-embed-text-v1.5-512", - "ArguAna": 47.45, - "CQADupstackRetrieval": 39.06, - "ClimateFEVER": 40.7, - "DBPedia": 42.96, - "FEVER": 85.7, - "FiQA2018": 36.92, - "HotpotQA": 71.48, - "MSMARCO": 42.29, - "NFCorpus": 33.31, - "NQ": 58.83, - "QuoraRetrieval": 87.87, - "SCIDOCS": 17.88, - "SciFact": 70.12, - "TRECCOVID": 82.12, - "Touche2020": 29.24 + "Model": "contriever-instruct", + "ARCChallenge": 7.63, + "AlphaNLI": 27.09, + "PIQA": 21.73, + "Quail": 4.92, + "RARbCode": 7.12, + "RARbMath": 21.83, + "SIQA": 0.88, + "SpartQA": 10.56, + "TempReasonL1": 1.8, + "TempReasonL2Fact": 22.03, + "TempReasonL2Pure": 0.94, + "TempReasonL3Fact": 20.82, + "TempReasonL3Pure": 7.15, + "WinoGrande": 26.3 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "nomic-embed-text-v1.5-512", - "BIOSSES": 83.3, - "SICK-R": 79.27, - "STS12": 78.3, - "STS13": 85.81, - "STS14": 81.38, - "STS15": 86.79, - "STS16": 84.56, - "STS17 (en-en)": 87.25, - "STS22 (en)": 65.24, - "STSBenchmark": 85.14 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "nomic-embed-text-v1.5-512", - "SummEval": 30.47 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -8456,7 +7901,7 @@ "p-MRR": [] } }, - "GritLM-7B-noinstruct": { + "cross-en-de-roberta-sentence-transformer": { "BitextMining": { "f1": [] }, @@ -8464,7 +7909,15 @@ "accuracy": [] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "cross-en-de-roberta-sentence-transformer", + "BlurbsClusteringP2P": 30.82, + "BlurbsClusteringS2S": 12.69, + "TenKGnadClusteringP2P": 23.5, + "TenKGnadClusteringS2S": 10.94 + } + ] }, "PairClassification": { "max_ap": [] @@ -8473,26 +7926,7 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "GritLM-7B-noinstruct", - "ARCChallenge": 16.57, - "AlphaNLI": 29.56, - "HellaSwag": 36.03, - "PIQA": 35.8, - "Quail": 8.68, - "RARbCode": 83.14, - "RARbMath": 83.01, - "SIQA": 5.73, - "SpartQA": 1.56, - "TempReasonL1": 2.57, - "TempReasonL2Fact": 48.25, - "TempReasonL2Pure": 8.98, - "TempReasonL3Fact": 34.11, - "TempReasonL3Pure": 12.44, - "WinoGrande": 52.12 - } - ] + "ndcg_at_10": [] }, "STS": { "cosine_spearman": [] @@ -8507,87 +7941,82 @@ "p-MRR": [] } }, - "LaBSE-ru-turbo": { + "deberta-v1-base": { "BitextMining": { "f1": [ { - "Model": "LaBSE-ru-turbo", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.22 + "Model": "deberta-v1-base", + "Tatoeba (rus-eng)": 13.21 } ] }, "Classification": { "accuracy": [ { - "Model": "LaBSE-ru-turbo", - "GeoreviewClassification (rus-Cyrl)": 46.04, - "HeadlineClassification (rus-Cyrl)": 69.98, - "InappropriatenessClassification (rus-Cyrl)": 61.39, - "KinopoiskClassification (rus-Cyrl)": 53.59, - "MassiveIntentClassification (rus-Cyrl)": 66.08, - "MassiveScenarioClassification (rus-Cyrl)": 71.13, - "RuReviewsClassification (rus-Cyrl)": 64.58, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.67, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.58 + "Model": "deberta-v1-base", + "GeoreviewClassification": 40.19, + "HeadlineClassification": 78.75, + "InappropriatenessClassification": 61.33, + "KinopoiskClassification": 48.78, + "MassiveIntentClassification (ru)": 61.32, + "MassiveScenarioClassification (ru)": 64.71, + "RuReviewsClassification": 55.66, + "RuSciBenchGRNTIClassification": 53.53, + "RuSciBenchOECDClassification": 41.34 } ] }, "Clustering": { "v_measure": [ { - "Model": "LaBSE-ru-turbo", - "GeoreviewClusteringP2P (rus-Cyrl)": 64.55, - "MLSUMClusteringP2P (rus-Cyrl)": 45.7, - "MLSUMClusteringS2S (rus-Cyrl)": 42.93, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.64, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48 + "Model": "deberta-v1-base", + "GeoreviewClusteringP2P": 58.79, + "MLSUMClusteringP2P (ru)": 47.33, + "MLSUMClusteringS2S (ru)": 44.6, + "RuSciBenchGRNTIClusteringP2P": 36.66, + "RuSciBenchOECDClusteringP2P": 33.31 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LaBSE-ru-turbo", - "OpusparcusPC (rus-Cyrl)": 89.32, - "TERRa (rus-Cyrl)": 57.81 + "Model": "deberta-v1-base", + "OpusparcusPC (ru)": 83.31, + "TERRa": 53.78 }, { - "Model": "LaBSE-ru-turbo", - "OpusparcusPC (rus-Cyrl)": 89.32, - "TERRa (rus-Cyrl)": 57.81 + "Model": "deberta-v1-base", + "OpusparcusPC (ru)": 83.69, + "TERRa": 56.49 } ] }, "Reranking": { "map": [ { - "Model": "LaBSE-ru-turbo", - "MIRACLReranking (rus-Cyrl)": 57.44 - }, - { - "Model": "LaBSE-ru-turbo", - "RuBQReranking (rus-Cyrl)": 68.65 + "Model": "deberta-v1-base", + "RuBQReranking": 34.01 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LaBSE-ru-turbo", - "MIRACLRetrieval (rus-Cyrl)": 55.97, - "RiaNewsRetrieval (rus-Cyrl)": 69.36, - "RuBQRetrieval (rus-Cyrl)": 65.71 + "Model": "deberta-v1-base", + "RiaNewsRetrieval": 4.84, + "RuBQRetrieval": 10.15 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LaBSE-ru-turbo", - "RUParaPhraserSTS (rus-Cyrl)": 72.97, - "RuSTSBenchmarkSTS (rus-Cyrl)": 81.77, - "STS22 (rus-Cyrl)": 62.89, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81 + "Model": "deberta-v1-base", + "RUParaPhraserSTS": 54.03, + "RuSTSBenchmarkSTS": 58.47, + "STS22 (ru)": 47.67, + "STSBenchmarkMultilingualSTS (ru)": 58.45 } ] }, @@ -8597,9 +8026,9 @@ "MultilabelClassification": { "accuracy": [ { - "Model": "LaBSE-ru-turbo", - "CEDRClassification (rus-Cyrl)": 45.11, - "SensitiveTopicsClassification (rus-Cyrl)": 27.52 + "Model": "deberta-v1-base", + "CEDRClassification": 34.14, + "SensitiveTopicsClassification": 23.67 } ] }, @@ -8607,62 +8036,32 @@ "p-MRR": [] } }, - "contriever-instruct": { + "dfm-encoder-large-v1": { "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "f1": [ { - "Model": "contriever-instruct", - "ARCChallenge": 7.63, - "AlphaNLI": 27.09, - "PIQA": 21.73, - "Quail": 4.92, - "RARbCode": 7.12, - "RARbMath": 21.83, - "SIQA": 0.88, - "SpartQA": 10.56, - "TempReasonL1": 1.8, - "TempReasonL2Fact": 22.03, - "TempReasonL2Pure": 0.94, - "TempReasonL3Fact": 20.82, - "TempReasonL3Pure": 7.15, - "WinoGrande": 26.3 + "Model": "dfm-encoder-large-v1", + "BornholmBitextMining": 11.65 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "mistral-7b-instruct-v0.2": { - "BitextMining": { - "f1": [] - }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "dfm-encoder-large-v1", + "AngryTweetsClassification": 53.8, + "DKHateClassification": 60.09, + "DanishPoliticalCommentsClassification": 36.6, + "LccSentimentClassification": 57.33, + "MassiveIntentClassification": 49.74, + "MassiveScenarioClassification": 50.1, + "NoRecClassification": 48.3, + "NordicLangClassification": 77.68, + "NorwegianParliament": 58.78, + "ScalaDaClassification": 63.08, + "ScalaNbClassification": 58.95 + } + ] }, "Clustering": { "v_measure": [] @@ -8686,99 +8085,90 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "mistral-7b-instruct-v0.2", - "Core17InstructionRetrieval": 13.03, - "News21InstructionRetrieval": 4.81, - "Robust04InstructionRetrieval": 12.61 - } - ] + "p-MRR": [] } }, - "text2vec-base-chinese": { + "distilrubert-small-cased-conversational": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "distilrubert-small-cased-conversational", + "Tatoeba (rus-eng)": 24.16 + } + ] }, "Classification": { "accuracy": [ { - "Model": "text2vec-base-chinese", - "AmazonReviewsClassification (zh)": 34.12, - "IFlyTek": 42.05, - "JDReview": 82.14, - "MassiveIntentClassification (zh-CN)": 63.98, - "MassiveScenarioClassification (zh-CN)": 70.52, - "MultilingualSentiment": 60.98, - "OnlineShopping": 85.69, - "TNews": 43.01, - "Waimai": 77.22 + "Model": "distilrubert-small-cased-conversational", + "GeoreviewClassification": 38.95, + "HeadlineClassification": 75.59, + "InappropriatenessClassification": 60.68, + "KinopoiskClassification": 49.67, + "MassiveIntentClassification (ru)": 63.12, + "MassiveScenarioClassification (ru)": 68.08, + "RuReviewsClassification": 54.05, + "RuSciBenchGRNTIClassification": 48.53, + "RuSciBenchOECDClassification": 37.65 } ] }, "Clustering": { "v_measure": [ { - "Model": "text2vec-base-chinese", - "CLSClusteringP2P": 35.27, - "CLSClusteringS2S": 32.42, - "ThuNewsClusteringP2P": 42.92, - "ThuNewsClusteringS2S": 40.01 + "Model": "distilrubert-small-cased-conversational", + "GeoreviewClusteringP2P": 43.26, + "MLSUMClusteringP2P (ru)": 50.08, + "MLSUMClusteringS2S (ru)": 51.12, + "RuSciBenchGRNTIClusteringP2P": 37.84, + "RuSciBenchOECDClusteringP2P": 34.12 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text2vec-base-chinese", - "Cmnli": 73.87, - "Ocnli": 60.95 + "Model": "distilrubert-small-cased-conversational", + "OpusparcusPC (ru)": 84.35, + "TERRa": 52.48 }, { - "Model": "text2vec-base-chinese", - "Cmnli": 73.9, - "Ocnli": 61.26 + "Model": "distilrubert-small-cased-conversational", + "OpusparcusPC (ru)": 84.35, + "TERRa": 53.02 } ] }, "Reranking": { "map": [ { - "Model": "text2vec-base-chinese", - "CMedQAv1": 59.26, - "CMedQAv2": 59.82, - "MMarcoReranking": 12.76, - "T2Reranking": 65.95 + "Model": "distilrubert-small-cased-conversational", + "MIRACLReranking (ru)": 13.09 + }, + { + "Model": "distilrubert-small-cased-conversational", + "RuBQReranking": 42.58 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text2vec-base-chinese", - "CmedqaRetrieval": 15.91, - "CovidRetrieval": 44.81, - "DuRetrieval": 52.23, - "EcomRetrieval": 34.6, - "MMarcoRetrieval": 44.06, - "MedicalRetrieval": 27.56, - "T2Retrieval": 51.67, - "VideoRetrieval": 39.52 + "Model": "distilrubert-small-cased-conversational", + "MIRACLRetrieval (ru)": 2.39, + "RiaNewsRetrieval": 4.14, + "RuBQRetrieval": 10.6 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text2vec-base-chinese", - "AFQMC": 26.06, - "ATEC": 31.93, - "BQ": 42.67, - "LCQMC": 70.16, - "PAWSX": 17.21, - "QBQTC": 24.62, - "STS22 (zh)": 55.35, - "STSB": 79.3 + "Model": "distilrubert-small-cased-conversational", + "RUParaPhraserSTS": 55.01, + "RuSTSBenchmarkSTS": 61.72, + "STS22 (ru)": 51.87, + "STSBenchmarkMultilingualSTS (ru)": 61.6 } ] }, @@ -8786,27 +8176,173 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "distilrubert-small-cased-conversational", + "CEDRClassification": 36.19, + "SensitiveTopicsClassification": 22.45 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "text-search-ada-doc-001": { + "distiluse-base-multilingual-cased-v2": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "AllegroReviews": 28.03, + "AmazonCounterfactualClassification": 65.39, + "AmazonPolarityClassification": 68.0, + "AmazonReviewsClassification": 33.89, + "Banking77Classification": 71.48, + "CBD": 60.0, + "EmotionClassification": 40.04, + "ImdbClassification": 61.52, + "MTOPDomainClassification": 73.62, + "MTOPIntentClassification": 47.73, + "MasakhaNEWSClassification": 76.87, + "MassiveIntentClassification": 54.96, + "MassiveScenarioClassification": 62.89, + "PAC": 68.17, + "PolEmo2.0-IN": 48.84, + "PolEmo2.0-OUT": 30.0, + "ToxicConversationsClassification": 69.09, + "TweetSentimentExtractionClassification": 59.97 + } + ] }, "Clustering": { "v_measure": [ { - "Model": "text-search-ada-doc-001", - "TwentyNewsgroupsClustering": 32.92 + "Model": "distiluse-base-multilingual-cased-v2", + "8TagsClustering": 12.51, + "AlloProfClusteringP2P": 55.95, + "AlloProfClusteringS2S": 35.39, + "ArxivClusteringP2P": 33.59, + "HALClusteringS2S": 18.2, + "MLSUMClusteringP2P": 40.17, + "MLSUMClusteringS2S": 34.65, + "MasakhaNEWSClusteringP2P": 53.76, + "MasakhaNEWSClusteringS2S": 32.76 + } + ] + }, + "PairClassification": { + "max_ap": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "CDSC-E": 71.83, + "OpusparcusPC": 92.07, + "PPC": 86.83, + "PSC": 96.35, + "PawsXPairClassification": 51.08, + "SICK-E-PL": 62.05, + "SprintDuplicateQuestions": 87.15, + "TwitterSemEval2015": 61.67, + "TwitterURLCorpus": 84.02 + }, + { + "Model": "distiluse-base-multilingual-cased-v2", + "CDSC-E": 72.0, + "OpusparcusPC": 92.07, + "PPC": 86.83, + "PSC": 96.35, + "PawsXPairClassification": 51.16, + "SICK-E-PL": 62.41, + "SprintDuplicateQuestions": 88.43, + "TwitterSemEval2015": 62.46, + "TwitterURLCorpus": 84.02 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "AlloprofReranking": 51.77, + "AskUbuntuDupQuestions": 53.75, + "MindSmallReranking": 30.39, + "SciDocsRR": 69.22, + "StackOverflowDupQuestions": 41.92, + "SyntecReranking": 74.78 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "AlloprofRetrieval": 26.99, + "ArguAna-PL": 36.7, + "BSARDRetrieval": 0.0, + "DBPedia-PL": 12.36, + "FiQA-PL": 8.02, + "HotpotQA-PL": 20.83, + "MSMARCO-PL": 4.57, + "MintakaRetrieval": 22.55, + "NFCorpus-PL": 16.28, + "NQ-PL": 5.85, + "Quora-PL": 71.95, + "SCIDOCS-PL": 6.5, + "SciFact-PL": 33.03, + "SyntecRetrieval": 65.34, + "TRECCOVID-PL": 16.91, + "XPQARetrieval": 51.2 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "BIOSSES": 78.34, + "CDSC-R": 87.67, + "SICK-R": 75.25, + "SICK-R-PL": 65.53, + "SICKFr": 72.49, + "STS12": 72.96, + "STS13": 70.58, + "STS14": 70.29, + "STS15": 81.94, + "STS16": 76.8, + "STS17": 80.51, + "STS22": 61.75, + "STSBenchmark": 80.75, + "STSBenchmarkMultilingualSTS": 77.49 + } + ] + }, + "Summarization": { + "cosine_spearman": [ + { + "Model": "distiluse-base-multilingual-cased-v2", + "SummEvalFr": 28.12 } ] }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "dragon-plus": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, "PairClassification": { "max_ap": [] }, @@ -8814,7 +8350,26 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "dragon-plus", + "ARCChallenge": 8.91, + "AlphaNLI": 32.1, + "HellaSwag": 27.69, + "PIQA": 28.01, + "Quail": 4.09, + "RARbCode": 17.58, + "RARbMath": 45.09, + "SIQA": 2.0, + "SpartQA": 10.34, + "TempReasonL1": 1.82, + "TempReasonL2Fact": 17.45, + "TempReasonL2Pure": 0.55, + "TempReasonL3Fact": 15.71, + "TempReasonL3Pure": 7.97, + "WinoGrande": 67.18 + } + ] }, "STS": { "cosine_spearman": [] @@ -8829,1104 +8384,1362 @@ "p-MRR": [] } }, - "LaBSE": { + "dragon-plus-instruct": { "BitextMining": { - "f1": [ - { - "Model": "LaBSE", - "BUCC (de-en)": 99.35, - "BUCC (fr-en)": 98.72, - "BUCC (ru-en)": 97.78, - "BUCC (zh-en)": 99.16, - "BornholmBitextMining (dan-Latn)": 45.63, - "Tatoeba (afr-eng)": 96.18, - "Tatoeba (amh-eng)": 91.47, - "Tatoeba (ang-eng)": 59.28, - "Tatoeba (ara-eng)": 88.8, - "Tatoeba (arq-eng)": 42.69, - "Tatoeba (arz-eng)": 76.0, - "Tatoeba (ast-eng)": 90.68, - "Tatoeba (awa-eng)": 71.7, - "Tatoeba (aze-eng)": 94.93, - "Tatoeba (bel-eng)": 95.0, - "Tatoeba (ben-eng)": 88.55, - "Tatoeba (ber-eng)": 8.4, - "Tatoeba (bos-eng)": 94.92, - "Tatoeba (bre-eng)": 15.07, - "Tatoeba (bul-eng)": 94.58, - "Tatoeba (cat-eng)": 95.38, - "Tatoeba (cbk-eng)": 79.44, - "Tatoeba (ceb-eng)": 64.42, - "Tatoeba (ces-eng)": 96.68, - "Tatoeba (cha-eng)": 31.77, - "Tatoeba (cmn-eng)": 95.1, - "Tatoeba (cor-eng)": 10.11, - "Tatoeba (csb-eng)": 52.57, - "Tatoeba (cym-eng)": 92.0, - "Tatoeba (dan-eng)": 95.71, - "Tatoeba (deu-eng)": 99.2, - "Tatoeba (dsb-eng)": 64.81, - "Tatoeba (dtp-eng)": 10.85, - "Tatoeba (ell-eng)": 95.35, - "Tatoeba (epo-eng)": 98.2, - "Tatoeba (est-eng)": 96.55, - "Tatoeba (eus-eng)": 95.01, - "Tatoeba (fao-eng)": 87.4, - "Tatoeba (fin-eng)": 96.37, - "Tatoeba (fra-eng)": 94.86, - "Tatoeba (fry-eng)": 89.31, - "Tatoeba (gla-eng)": 85.66, - "Tatoeba (gle-eng)": 93.8, - "Tatoeba (glg-eng)": 96.82, - "Tatoeba (gsw-eng)": 46.5, - "Tatoeba (heb-eng)": 91.53, - "Tatoeba (hin-eng)": 96.87, - "Tatoeba (hrv-eng)": 96.95, - "Tatoeba (hsb-eng)": 67.11, - "Tatoeba (hun-eng)": 96.55, - "Tatoeba (hye-eng)": 94.09, - "Tatoeba (ido-eng)": 89.42, - "Tatoeba (ile-eng)": 85.58, - "Tatoeba (ina-eng)": 95.37, - "Tatoeba (ind-eng)": 93.66, - "Tatoeba (isl-eng)": 94.75, - "Tatoeba (ita-eng)": 92.72, - "Tatoeba (jav-eng)": 79.77, - "Tatoeba (jpn-eng)": 95.38, - "Tatoeba (kab-eng)": 4.31, - "Tatoeba (kat-eng)": 95.02, - "Tatoeba (kaz-eng)": 87.49, - "Tatoeba (khm-eng)": 78.37, - "Tatoeba (kor-eng)": 90.95, - "Tatoeba (kur-eng)": 83.59, - "Tatoeba (kzj-eng)": 11.33, - "Tatoeba (lat-eng)": 80.07, - "Tatoeba (lfn-eng)": 67.54, - "Tatoeba (lit-eng)": 96.47, - "Tatoeba (lvs-eng)": 95.88, - "Tatoeba (mal-eng)": 98.45, - "Tatoeba (mar-eng)": 92.65, - "Tatoeba (max-eng)": 63.26, - "Tatoeba (mhr-eng)": 15.74, - "Tatoeba (mkd-eng)": 93.6, - "Tatoeba (mon-eng)": 95.91, - "Tatoeba (nds-eng)": 79.42, - "Tatoeba (nld-eng)": 96.07, - "Tatoeba (nno-eng)": 94.48, - "Tatoeba (nob-eng)": 98.4, - "Tatoeba (nov-eng)": 74.38, - "Tatoeba (oci-eng)": 65.81, - "Tatoeba (orv-eng)": 38.93, - "Tatoeba (pam-eng)": 10.73, - "Tatoeba (pes-eng)": 94.7, - "Tatoeba (pms-eng)": 64.57, - "Tatoeba (pol-eng)": 97.22, - "Tatoeba (por-eng)": 94.14, - "Tatoeba (ron-eng)": 96.92, - "Tatoeba (rus-eng)": 93.75, - "Tatoeba (slk-eng)": 96.5, - "Tatoeba (slv-eng)": 96.03, - "Tatoeba (spa-eng)": 98.4, - "Tatoeba (sqi-eng)": 96.76, - "Tatoeba (srp-eng)": 94.43, - "Tatoeba (swe-eng)": 95.63, - "Tatoeba (swg-eng)": 59.36, - "Tatoeba (swh-eng)": 84.5, - "Tatoeba (tam-eng)": 89.0, - "Tatoeba (tat-eng)": 85.92, - "Tatoeba (tel-eng)": 97.86, - "Tatoeba (tgl-eng)": 96.02, - "Tatoeba (tha-eng)": 96.14, - "Tatoeba (tuk-eng)": 75.27, - "Tatoeba (tur-eng)": 98.0, - "Tatoeba (tzl-eng)": 58.88, - "Tatoeba (uig-eng)": 92.4, - "Tatoeba (ukr-eng)": 93.97, - "Tatoeba (urd-eng)": 93.22, - "Tatoeba (uzb-eng)": 84.23, - "Tatoeba (vie-eng)": 97.2, - "Tatoeba (war-eng)": 60.29, - "Tatoeba (wuu-eng)": 90.18, - "Tatoeba (xho-eng)": 91.55, - "Tatoeba (yid-eng)": 88.79, - "Tatoeba (yue-eng)": 89.58, - "Tatoeba (zsm-eng)": 95.62, - "Tatoeba (ber-Tfng_eng-Latn)": 8.4, - "Tatoeba (hye-Armn_eng-Latn)": 94.09, - "Tatoeba (ces-Latn_eng-Latn)": 96.68, - "Tatoeba (slk-Latn_eng-Latn)": 96.5, - "Tatoeba (cat-Latn_eng-Latn)": 95.38, - "Tatoeba (awa-Deva_eng-Latn)": 71.7, - "Tatoeba (lat-Latn_eng-Latn)": 80.07, - "Tatoeba (hsb-Latn_eng-Latn)": 67.11, - "Tatoeba (swh-Latn_eng-Latn)": 84.5, - "Tatoeba (ind-Latn_eng-Latn)": 93.66, - "Tatoeba (xho-Latn_eng-Latn)": 91.55, - "Tatoeba (nno-Latn_eng-Latn)": 94.48, - "Tatoeba (csb-Latn_eng-Latn)": 52.57, - "Tatoeba (kzj-Latn_eng-Latn)": 11.33, - "Tatoeba (isl-Latn_eng-Latn)": 94.75, - "Tatoeba (nld-Latn_eng-Latn)": 96.07, - "Tatoeba (ell-Grek_eng-Latn)": 95.35, - "Tatoeba (por-Latn_eng-Latn)": 94.14, - "Tatoeba (ita-Latn_eng-Latn)": 92.72, - "Tatoeba (vie-Latn_eng-Latn)": 97.2, - "Tatoeba (uzb-Latn_eng-Latn)": 84.23, - "Tatoeba (bul-Cyrl_eng-Latn)": 94.58, - "Tatoeba (tgl-Latn_eng-Latn)": 96.02, - "Tatoeba (gla-Latn_eng-Latn)": 85.66, - "Tatoeba (mkd-Cyrl_eng-Latn)": 93.6, - "Tatoeba (tel-Telu_eng-Latn)": 97.86, - "Tatoeba (kaz-Cyrl_eng-Latn)": 87.49, - "Tatoeba (zsm-Latn_eng-Latn)": 95.62, - "Tatoeba (hin-Deva_eng-Latn)": 96.87, - "Tatoeba (ile-Latn_eng-Latn)": 85.58, - "Tatoeba (jpn-Jpan_eng-Latn)": 95.38, - "Tatoeba (orv-Cyrl_eng-Latn)": 38.93, - "Tatoeba (kab-Latn_eng-Latn)": 4.31, - "Tatoeba (yue-Hant_eng-Latn)": 89.58, - "Tatoeba (bel-Cyrl_eng-Latn)": 95.0, - "Tatoeba (dtp-Latn_eng-Latn)": 10.85, - "Tatoeba (ron-Latn_eng-Latn)": 96.92, - "Tatoeba (arz-Arab_eng-Latn)": 76.0, - "Tatoeba (tzl-Latn_eng-Latn)": 58.88, - "Tatoeba (slv-Latn_eng-Latn)": 96.03, - "Tatoeba (jav-Latn_eng-Latn)": 79.77, - "Tatoeba (pol-Latn_eng-Latn)": 97.22, - "Tatoeba (heb-Hebr_eng-Latn)": 91.53, - "Tatoeba (ang-Latn_eng-Latn)": 59.28, - "Tatoeba (ara-Arab_eng-Latn)": 88.8, - "Tatoeba (tuk-Latn_eng-Latn)": 75.27, - "Tatoeba (afr-Latn_eng-Latn)": 96.18, - "Tatoeba (kor-Hang_eng-Latn)": 90.95, - "Tatoeba (mhr-Cyrl_eng-Latn)": 15.74, - "Tatoeba (fry-Latn_eng-Latn)": 89.31, - "Tatoeba (urd-Arab_eng-Latn)": 93.22, - "Tatoeba (srp-Cyrl_eng-Latn)": 94.43, - "Tatoeba (cbk-Latn_eng-Latn)": 79.44, - "Tatoeba (tat-Cyrl_eng-Latn)": 85.92, - "Tatoeba (wuu-Hans_eng-Latn)": 90.18, - "Tatoeba (war-Latn_eng-Latn)": 60.29, - "Tatoeba (kat-Geor_eng-Latn)": 95.02, - "Tatoeba (nds-Latn_eng-Latn)": 79.42, - "Tatoeba (gle-Latn_eng-Latn)": 93.8, - "Tatoeba (mal-Mlym_eng-Latn)": 98.45, - "Tatoeba (tha-Thai_eng-Latn)": 96.14, - "Tatoeba (fin-Latn_eng-Latn)": 96.37, - "Tatoeba (cor-Latn_eng-Latn)": 10.11, - "Tatoeba (tur-Latn_eng-Latn)": 98.0, - "Tatoeba (yid-Hebr_eng-Latn)": 88.79, - "Tatoeba (eus-Latn_eng-Latn)": 95.01, - "Tatoeba (cmn-Hans_eng-Latn)": 95.1, - "Tatoeba (fao-Latn_eng-Latn)": 87.4, - "Tatoeba (lfn-Latn_eng-Latn)": 67.54, - "Tatoeba (bos-Latn_eng-Latn)": 94.92, - "Tatoeba (arq-Arab_eng-Latn)": 42.69, - "Tatoeba (sqi-Latn_eng-Latn)": 96.76, - "Tatoeba (ben-Beng_eng-Latn)": 88.55, - "Tatoeba (pes-Arab_eng-Latn)": 94.7, - "Tatoeba (fra-Latn_eng-Latn)": 94.86, - "Tatoeba (deu-Latn_eng-Latn)": 99.2, - "Tatoeba (spa-Latn_eng-Latn)": 98.4, - "Tatoeba (oci-Latn_eng-Latn)": 65.81, - "Tatoeba (ina-Latn_eng-Latn)": 95.37, - "Tatoeba (hrv-Latn_eng-Latn)": 96.95, - "Tatoeba (gsw-Latn_eng-Latn)": 46.5, - "Tatoeba (swe-Latn_eng-Latn)": 95.63, - "Tatoeba (bre-Latn_eng-Latn)": 15.07, - "Tatoeba (hun-Latn_eng-Latn)": 96.55, - "Tatoeba (uig-Arab_eng-Latn)": 92.4, - "Tatoeba (mar-Deva_eng-Latn)": 92.65, - "Tatoeba (nob-Latn_eng-Latn)": 98.4, - "Tatoeba (rus-Cyrl_eng-Latn)": 93.75, - "Tatoeba (ceb-Latn_eng-Latn)": 64.42, - "Tatoeba (aze-Latn_eng-Latn)": 94.93, - "Tatoeba (dsb-Latn_eng-Latn)": 64.81, - "Tatoeba (tam-Taml_eng-Latn)": 89.0, - "Tatoeba (est-Latn_eng-Latn)": 96.55, - "Tatoeba (cym-Latn_eng-Latn)": 92.0, - "Tatoeba (amh-Ethi_eng-Latn)": 91.47, - "Tatoeba (dan-Latn_eng-Latn)": 95.71, - "Tatoeba (epo-Latn_eng-Latn)": 98.2, - "Tatoeba (ast-Latn_eng-Latn)": 90.68, - "Tatoeba (swg-Latn_eng-Latn)": 59.36, - "Tatoeba (pms-Latn_eng-Latn)": 64.57, - "Tatoeba (kur-Latn_eng-Latn)": 83.59, - "Tatoeba (khm-Khmr_eng-Latn)": 78.37, - "Tatoeba (ido-Latn_eng-Latn)": 89.42, - "Tatoeba (ukr-Cyrl_eng-Latn)": 93.97, - "Tatoeba (mon-Cyrl_eng-Latn)": 95.91, - "Tatoeba (nov-Latn_eng-Latn)": 74.38, - "Tatoeba (cha-Latn_eng-Latn)": 31.77, - "Tatoeba (pam-Latn_eng-Latn)": 10.73, - "Tatoeba (lvs-Latn_eng-Latn)": 95.88, - "Tatoeba (max-Deva_eng-Latn)": 63.26, - "Tatoeba (lit-Latn_eng-Latn)": 96.47, - "Tatoeba (glg-Latn_eng-Latn)": 96.82 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "LaBSE", - "AllegroReviews": 34.89, - "AllegroReviews (pol-Latn)": 34.86, - "AmazonCounterfactualClassification (de)": 73.17, - "AmazonCounterfactualClassification (en)": 75.9, - "AmazonCounterfactualClassification (en-ext)": 76.12, - "AmazonCounterfactualClassification (ja)": 76.42, - "AmazonCounterfactualClassification (deu-Latn)": 73.17, - "AmazonCounterfactualClassification (jpn-Jpan)": 76.4, - "AmazonPolarityClassification": 68.94, - "AmazonReviewsClassification (de)": 39.92, - "AmazonReviewsClassification (en)": 35.81, - "AmazonReviewsClassification (es)": 39.39, - "AmazonReviewsClassification (fr)": 38.52, - "AmazonReviewsClassification (ja)": 36.44, - "AmazonReviewsClassification (zh)": 36.45, - "AmazonReviewsClassification (deu-Latn)": 39.93, - "AmazonReviewsClassification (spa-Latn)": 39.39, - "AmazonReviewsClassification (fra-Latn)": 38.53, - "AmazonReviewsClassification (jpn-Jpan)": 36.45, - "AmazonReviewsClassification (cmn-Hans)": 36.45, - "AngryTweetsClassification (dan-Latn)": 51.11, - "Banking77Classification": 69.85, - "CBD": 65.71, - "CBD (pol-Latn)": 65.74, - "DanishPoliticalCommentsClassification (dan-Latn)": 38.34, - "EmotionClassification": 37.22, - "GeoreviewClassification (rus-Cyrl)": 40.86, - "HeadlineClassification (rus-Cyrl)": 68.75, - "IFlyTek (cmn-Hans)": 43.19, - "ImdbClassification": 62.04, - "InappropriatenessClassification (rus-Cyrl)": 58.52, - "JDReview (cmn-Hans)": 79.14, - "KinopoiskClassification (rus-Cyrl)": 46.77, - "LccSentimentClassification (dan-Latn)": 50.07, - "MTOPDomainClassification (de)": 86.95, - "MTOPDomainClassification (en)": 86.08, - "MTOPDomainClassification (es)": 84.07, - "MTOPDomainClassification (fr)": 84.14, - "MTOPDomainClassification (hi)": 85.11, - "MTOPDomainClassification (th)": 81.24, - "MTOPDomainClassification (deu-Latn)": 86.93, - "MTOPDomainClassification (spa-Latn)": 84.06, - "MTOPDomainClassification (fra-Latn)": 84.14, - "MTOPDomainClassification (hin-Deva)": 85.11, - "MTOPDomainClassification (tha-Thai)": 81.24, - "MTOPIntentClassification (de)": 63.42, - "MTOPIntentClassification (en)": 63.07, - "MTOPIntentClassification (es)": 64.44, - "MTOPIntentClassification (fr)": 62.01, - "MTOPIntentClassification (hi)": 62.58, - "MTOPIntentClassification (th)": 64.61, - "MTOPIntentClassification (deu-Latn)": 63.46, - "MTOPIntentClassification (spa-Latn)": 64.46, - "MTOPIntentClassification (fra-Latn)": 62.05, - "MTOPIntentClassification (hin-Deva)": 62.61, - "MTOPIntentClassification (tha-Thai)": 64.7, - "MasakhaNEWSClassification (fra)": 77.39, - "MasakhaNEWSClassification (amh-Ethi)": 81.78, - "MasakhaNEWSClassification (eng)": 77.77, - "MasakhaNEWSClassification (fra-Latn)": 72.09, - "MasakhaNEWSClassification (hau-Latn)": 73.12, - "MasakhaNEWSClassification (ibo-Latn)": 69.1, - "MasakhaNEWSClassification (lin-Latn)": 74.63, - "MasakhaNEWSClassification (lug-Latn)": 57.44, - "MasakhaNEWSClassification (orm-Ethi)": 51.6, - "MasakhaNEWSClassification (pcm-Latn)": 91.44, - "MasakhaNEWSClassification (run-Latn)": 73.76, - "MasakhaNEWSClassification (sna-Latn)": 87.18, - "MasakhaNEWSClassification (som-Latn)": 60.03, - "MasakhaNEWSClassification (swa-Latn)": 69.33, - "MasakhaNEWSClassification (tir-Ethi)": 61.73, - "MasakhaNEWSClassification (xho-Latn)": 77.34, - "MasakhaNEWSClassification (yor-Latn)": 77.13, - "MassiveIntentClassification (af)": 56.12, - "MassiveIntentClassification (am)": 55.71, - "MassiveIntentClassification (ar)": 50.86, - "MassiveIntentClassification (az)": 58.97, - "MassiveIntentClassification (bn)": 58.22, - "MassiveIntentClassification (cy)": 50.16, - "MassiveIntentClassification (da)": 58.25, - "MassiveIntentClassification (de)": 56.21, - "MassiveIntentClassification (el)": 57.03, - "MassiveIntentClassification (en)": 61.44, - "MassiveIntentClassification (es)": 58.32, - "MassiveIntentClassification (fa)": 62.33, - "MassiveIntentClassification (fi)": 60.12, - "MassiveIntentClassification (fr)": 60.47, - "MassiveIntentClassification (he)": 56.55, - "MassiveIntentClassification (hi)": 59.4, - "MassiveIntentClassification (hu)": 59.52, - "MassiveIntentClassification (hy)": 56.2, - "MassiveIntentClassification (id)": 61.12, - "MassiveIntentClassification (is)": 54.9, - "MassiveIntentClassification (it)": 59.83, - "MassiveIntentClassification (ja)": 63.11, - "MassiveIntentClassification (jv)": 50.98, - "MassiveIntentClassification (ka)": 48.35, - "MassiveIntentClassification (km)": 48.55, - "MassiveIntentClassification (kn)": 56.24, - "MassiveIntentClassification (ko)": 60.99, - "MassiveIntentClassification (lv)": 57.1, - "MassiveIntentClassification (ml)": 57.91, - "MassiveIntentClassification (mn)": 58.5, - "MassiveIntentClassification (ms)": 58.6, - "MassiveIntentClassification (my)": 57.35, - "MassiveIntentClassification (nb)": 57.91, - "MassiveIntentClassification (nl)": 59.37, - "MassiveIntentClassification (pl)": 59.71, - "MassiveIntentClassification (pt)": 60.16, - "MassiveIntentClassification (ro)": 57.92, - "MassiveIntentClassification (ru)": 60.67, - "MassiveIntentClassification (sl)": 59.37, - "MassiveIntentClassification (sq)": 58.03, - "MassiveIntentClassification (sv)": 59.66, - "MassiveIntentClassification (sw)": 51.62, - "MassiveIntentClassification (ta)": 55.04, - "MassiveIntentClassification (te)": 58.32, - "MassiveIntentClassification (th)": 56.58, - "MassiveIntentClassification (tl)": 55.28, - "MassiveIntentClassification (tr)": 60.91, - "MassiveIntentClassification (ur)": 56.7, - "MassiveIntentClassification (vi)": 56.67, - "MassiveIntentClassification (zh-CN)": 63.86, - "MassiveIntentClassification (zh-TW)": 59.51, - "MassiveIntentClassification (msa-Latn)": 58.59, - "MassiveIntentClassification (slv-Latn)": 59.39, - "MassiveIntentClassification (hun-Latn)": 59.52, - "MassiveIntentClassification (swe-Latn)": 59.64, - "MassiveIntentClassification (nld-Latn)": 59.31, - "MassiveIntentClassification (ind-Latn)": 61.14, - "MassiveIntentClassification (vie-Latn)": 56.68, - "MassiveIntentClassification (cmo-Hans)": 63.85, - "MassiveIntentClassification (amh-Ethi)": 55.67, - "MassiveIntentClassification (kor-Kore)": 60.93, - "MassiveIntentClassification (ell-Grek)": 56.96, - "MassiveIntentClassification (nob-Latn)": 57.81, - "MassiveIntentClassification (jav-Latn)": 50.94, - "MassiveIntentClassification (fas-Arab)": 62.33, - "MassiveIntentClassification (jpn-Jpan)": 63.13, - "MassiveIntentClassification (ita-Latn)": 59.75, - "MassiveIntentClassification (spa-Latn)": 58.26, - "MassiveIntentClassification (mya-Mymr)": 57.23, - "MassiveIntentClassification (fin-Latn)": 60.09, - "MassiveIntentClassification (heb-Hebr)": 56.42, - "MassiveIntentClassification (isl-Latn)": 54.83, - "MassiveIntentClassification (tgl-Latn)": 55.1, - "MassiveIntentClassification (kan-Knda)": 56.2, - "MassiveIntentClassification (urd-Arab)": 56.68, - "MassiveIntentClassification (mal-Mlym)": 57.75, - "MassiveIntentClassification (tur-Latn)": 60.91, - "MassiveIntentClassification (swa-Latn)": 51.5, - "MassiveIntentClassification (khm-Khmr)": 48.46, - "MassiveIntentClassification (ara-Arab)": 50.78, - "MassiveIntentClassification (rus-Cyrl)": 60.64, - "MassiveIntentClassification (tel-Telu)": 58.33, - "MassiveIntentClassification (tam-Taml)": 54.95, - "MassiveIntentClassification (por-Latn)": 60.15, - "MassiveIntentClassification (cmo-Hant)": 59.47, - "MassiveIntentClassification (pol-Latn)": 59.75, - "MassiveIntentClassification (aze-Latn)": 58.91, - "MassiveIntentClassification (ron-Latn)": 57.84, - "MassiveIntentClassification (kat-Geor)": 48.26, - "MassiveIntentClassification (sqi-Latn)": 58.08, - "MassiveIntentClassification (ben-Beng)": 58.14, - "MassiveIntentClassification (tha-Thai)": 56.45, - "MassiveIntentClassification (dan-Latn)": 58.23, - "MassiveIntentClassification (deu-Latn)": 56.16, - "MassiveIntentClassification (hye-Armn)": 56.22, - "MassiveIntentClassification (mon-Cyrl)": 58.46, - "MassiveIntentClassification (cym-Latn)": 50.17, - "MassiveIntentClassification (afr-Latn)": 56.07, - "MassiveIntentClassification (fra-Latn)": 60.42, - "MassiveIntentClassification (hin-Deva)": 59.37, - "MassiveIntentClassification (lav-Latn)": 57.06, - "MassiveScenarioClassification (af)": 63.39, - "MassiveScenarioClassification (am)": 62.02, - "MassiveScenarioClassification (ar)": 57.72, - "MassiveScenarioClassification (az)": 63.48, - "MassiveScenarioClassification (bn)": 61.84, - "MassiveScenarioClassification (cy)": 56.13, - "MassiveScenarioClassification (da)": 65.24, - "MassiveScenarioClassification (de)": 62.39, - "MassiveScenarioClassification (el)": 64.58, - "MassiveScenarioClassification (en)": 66.44, - "MassiveScenarioClassification (es)": 63.61, - "MassiveScenarioClassification (fa)": 67.46, - "MassiveScenarioClassification (fi)": 64.58, - "MassiveScenarioClassification (fr)": 65.1, - "MassiveScenarioClassification (he)": 63.53, - "MassiveScenarioClassification (hi)": 64.4, - "MassiveScenarioClassification (hu)": 65.82, - "MassiveScenarioClassification (hy)": 61.25, - "MassiveScenarioClassification (id)": 65.84, - "MassiveScenarioClassification (is)": 61.94, - "MassiveScenarioClassification (it)": 64.09, - "MassiveScenarioClassification (ja)": 67.72, - "MassiveScenarioClassification (jv)": 58.29, - "MassiveScenarioClassification (ka)": 53.38, - "MassiveScenarioClassification (km)": 56.18, - "MassiveScenarioClassification (kn)": 61.74, - "MassiveScenarioClassification (ko)": 67.26, - "MassiveScenarioClassification (lv)": 61.87, - "MassiveScenarioClassification (ml)": 62.26, - "MassiveScenarioClassification (mn)": 62.6, - "MassiveScenarioClassification (ms)": 65.63, - "MassiveScenarioClassification (my)": 62.94, - "MassiveScenarioClassification (nb)": 64.29, - "MassiveScenarioClassification (nl)": 65.16, - "MassiveScenarioClassification (pl)": 64.58, - "MassiveScenarioClassification (pt)": 63.28, - "MassiveScenarioClassification (ro)": 62.41, - "MassiveScenarioClassification (ru)": 65.25, - "MassiveScenarioClassification (sl)": 64.25, - "MassiveScenarioClassification (sq)": 64.54, - "MassiveScenarioClassification (sv)": 66.01, - "MassiveScenarioClassification (sw)": 58.36, - "MassiveScenarioClassification (ta)": 59.08, - "MassiveScenarioClassification (te)": 64.13, - "MassiveScenarioClassification (th)": 64.34, - "MassiveScenarioClassification (tl)": 60.23, - "MassiveScenarioClassification (tr)": 65.43, - "MassiveScenarioClassification (ur)": 61.52, - "MassiveScenarioClassification (vi)": 61.05, - "MassiveScenarioClassification (zh-CN)": 70.85, - "MassiveScenarioClassification (zh-TW)": 67.08, - "MassiveScenarioClassification (tel-Telu)": 64.12, - "MassiveScenarioClassification (deu-Latn)": 62.4, - "MassiveScenarioClassification (ell-Grek)": 64.57, - "MassiveScenarioClassification (mal-Mlym)": 62.24, - "MassiveScenarioClassification (nld-Latn)": 65.17, - "MassiveScenarioClassification (jav-Latn)": 58.29, - "MassiveScenarioClassification (ron-Latn)": 62.39, - "MassiveScenarioClassification (por-Latn)": 63.28, - "MassiveScenarioClassification (pol-Latn)": 64.55, - "MassiveScenarioClassification (slv-Latn)": 64.26, - "MassiveScenarioClassification (tam-Taml)": 59.07, - "MassiveScenarioClassification (swa-Latn)": 58.37, - "MassiveScenarioClassification (tgl-Latn)": 60.23, - "MassiveScenarioClassification (ind-Latn)": 65.86, - "MassiveScenarioClassification (rus-Cyrl)": 65.23, - "MassiveScenarioClassification (swe-Latn)": 65.99, - "MassiveScenarioClassification (ara-Arab)": 57.74, - "MassiveScenarioClassification (fra-Latn)": 65.1, - "MassiveScenarioClassification (tha-Thai)": 64.32, - "MassiveScenarioClassification (hye-Armn)": 61.29, - "MassiveScenarioClassification (nob-Latn)": 64.3, - "MassiveScenarioClassification (cym-Latn)": 56.12, - "MassiveScenarioClassification (mya-Mymr)": 62.94, - "MassiveScenarioClassification (ben-Beng)": 61.86, - "MassiveScenarioClassification (ita-Latn)": 64.09, - "MassiveScenarioClassification (kor-Kore)": 67.3, - "MassiveScenarioClassification (kan-Knda)": 61.74, - "MassiveScenarioClassification (heb-Hebr)": 63.52, - "MassiveScenarioClassification (amh-Ethi)": 62.02, - "MassiveScenarioClassification (kat-Geor)": 53.37, - "MassiveScenarioClassification (vie-Latn)": 61.06, - "MassiveScenarioClassification (urd-Arab)": 61.47, - "MassiveScenarioClassification (cmo-Hant)": 67.05, - "MassiveScenarioClassification (msa-Latn)": 65.62, - "MassiveScenarioClassification (isl-Latn)": 61.93, - "MassiveScenarioClassification (khm-Khmr)": 56.2, - "MassiveScenarioClassification (hun-Latn)": 65.82, - "MassiveScenarioClassification (fin-Latn)": 64.56, - "MassiveScenarioClassification (sqi-Latn)": 64.56, - "MassiveScenarioClassification (fas-Arab)": 67.43, - "MassiveScenarioClassification (afr-Latn)": 63.38, - "MassiveScenarioClassification (mon-Cyrl)": 62.62, - "MassiveScenarioClassification (dan-Latn)": 65.26, - "MassiveScenarioClassification (aze-Latn)": 63.47, - "MassiveScenarioClassification (spa-Latn)": 63.61, - "MassiveScenarioClassification (jpn-Jpan)": 67.7, - "MassiveScenarioClassification (tur-Latn)": 65.43, - "MassiveScenarioClassification (lav-Latn)": 61.86, - "MassiveScenarioClassification (cmo-Hans)": 70.84, - "MassiveScenarioClassification (hin-Deva)": 64.41, - "MultilingualSentiment (cmn-Hans)": 64.6, - "NoRecClassification (nob-Latn)": 45.45, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 35.39, - "OnlineShopping (cmn-Hans)": 85.63, - "PAC": 68.11, - "PAC (pol-Latn)": 68.09, - "PolEmo2.0-IN": 64.0, - "PolEmo2.0-IN (pol-Latn)": 63.91, - "PolEmo2.0-OUT": 44.72, - "PolEmo2.0-OUT (pol-Latn)": 44.76, - "RuReviewsClassification (rus-Cyrl)": 58.01, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 53.04, - "RuSciBenchOECDClassification (rus-Cyrl)": 40.48, - "TNews (cmn-Hans)": 46.02, - "ToxicConversationsClassification": 63.24, - "TweetSentimentExtractionClassification": 58.83, - "Waimai (cmn-Hans)": 82.85 + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "dragon-plus-instruct", + "ARCChallenge": 8.24, + "AlphaNLI": 25.18, + "HellaSwag": 24.06, + "PIQA": 26.35, + "Quail": 4.2, + "RARbCode": 12.84, + "RARbMath": 36.15, + "SIQA": 1.75, + "SpartQA": 10.82, + "TempReasonL1": 1.54, + "TempReasonL2Fact": 16.11, + "TempReasonL2Pure": 0.57, + "TempReasonL3Fact": 14.81, + "TempReasonL3Pure": 7.46, + "WinoGrande": 60.84 + } + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "e5-base": { + "BitextMining": { + "f1": [ + { + "Model": "e5-base", + "BornholmBitextMining": 40.09 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "e5-base", + "AngryTweetsClassification": 45.06, + "DKHateClassification": 58.51, + "DanishPoliticalCommentsClassification": 28.43, + "LccSentimentClassification": 37.47, + "MassiveIntentClassification": 41.34, + "MassiveScenarioClassification": 50.0, + "NoRecClassification": 42.0, + "NordicLangClassification": 59.34, + "NorwegianParliament": 57.42, + "ScalaDaClassification": 50.08, + "ScalaNbClassification": 50.18 + } + ] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-base", + "LEMBNarrativeQARetrieval": 25.31, + "LEMBQMSumRetrieval": 23.83, + "LEMBSummScreenFDRetrieval": 74.67, + "LEMBWikimQARetrieval": 55.85 + } + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "e5-base-4k": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-base-4k", + "LEMBNarrativeQARetrieval": 30.35, + "LEMBQMSumRetrieval": 35.6, + "LEMBSummScreenFDRetrieval": 95.23, + "LEMBWikimQARetrieval": 69.19 + } + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "e5-base-v2": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [ + { + "Model": "e5-base-v2", + "BiorxivClusteringP2P": 37.12, + "BiorxivClusteringS2S": 33.41, + "MedrxivClusteringP2P": 31.82, + "MedrxivClusteringS2S": 29.68, + "RedditClustering": 56.54, + "RedditClusteringP2P": 63.23, + "StackExchangeClustering": 64.6, + "StackExchangeClusteringP2P": 33.02, + "TwentyNewsgroupsClustering": 49.86 + } + ] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "e5-base-v2", + "Core17InstructionRetrieval": -2.9, + "News21InstructionRetrieval": -2.0, + "Robust04InstructionRetrieval": -6.73 + } + ] + } + }, + "e5-large": { + "BitextMining": { + "f1": [ + { + "Model": "e5-large", + "BornholmBitextMining": 40.15 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "e5-large", + "AngryTweetsClassification": 46.14, + "DKHateClassification": 58.72, + "DanishPoliticalCommentsClassification": 28.67, + "LccSentimentClassification": 42.13, + "MassiveIntentClassification": 40.69, + "MassiveScenarioClassification": 50.97, + "NoRecClassification": 41.83, + "NordicLangClassification": 58.3, + "NorwegianParliament": 57.26, + "ScalaDaClassification": 49.9, + "ScalaNbClassification": 50.13 + } + ] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "e5-large-v2": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [ + { + "Model": "e5-large-v2", + "BiorxivClusteringP2P": 36.72, + "BiorxivClusteringS2S": 35.47, + "MedrxivClusteringP2P": 31.45, + "MedrxivClusteringS2S": 29.91, + "RedditClustering": 55.5, + "RedditClusteringP2P": 63.71, + "StackExchangeClustering": 65.23, + "StackExchangeClusteringP2P": 33.62, + "TwentyNewsgroupsClustering": 48.73 + } + ] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "e5-large-v2", + "Core17InstructionRetrieval": 0.12, + "News21InstructionRetrieval": 0.87, + "Robust04InstructionRetrieval": -4.16 + } + ] + } + }, + "e5-mistral-7b-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "e5-mistral-7b-instruct", + "BornholmBitextMining": 57.24, + "Tatoeba (arq-eng)": 48.55, + "Tatoeba (uig-eng)": 38.83, + "Tatoeba (csb-eng)": 62.92, + "Tatoeba (ces-eng)": 94.93, + "Tatoeba (urd-eng)": 83.62, + "Tatoeba (heb-eng)": 82.82, + "Tatoeba (gsw-eng)": 52.21, + "Tatoeba (dan-eng)": 93.92, + "Tatoeba (zsm-eng)": 94.23, + "Tatoeba (kab-eng)": 3.81, + "Tatoeba (bre-eng)": 14.2, + "Tatoeba (spa-eng)": 98.8, + "Tatoeba (nld-eng)": 96.4, + "Tatoeba (ber-eng)": 7.62, + "Tatoeba (cym-eng)": 72.32, + "Tatoeba (tel-eng)": 42.86, + "Tatoeba (ind-eng)": 93.48, + "Tatoeba (cbk-eng)": 81.32, + "Tatoeba (mal-eng)": 56.03, + "Tatoeba (bos-eng)": 91.54, + "Tatoeba (kor-eng)": 91.16, + "Tatoeba (cmn-eng)": 95.9, + "Tatoeba (glg-eng)": 90.44, + "Tatoeba (nov-eng)": 72.4, + "Tatoeba (est-eng)": 62.93, + "Tatoeba (max-eng)": 64.8, + "Tatoeba (ile-eng)": 83.97, + "Tatoeba (dtp-eng)": 11.74, + "Tatoeba (mon-eng)": 37.79, + "Tatoeba (jav-eng)": 38.38, + "Tatoeba (xho-eng)": 35.94, + "Tatoeba (yid-eng)": 33.12, + "Tatoeba (ang-eng)": 81.59, + "Tatoeba (pes-eng)": 89.67, + "Tatoeba (ceb-eng)": 43.15, + "Tatoeba (nob-eng)": 96.86, + "Tatoeba (ara-eng)": 88.76, + "Tatoeba (pms-eng)": 62.3, + "Tatoeba (swe-eng)": 92.95, + "Tatoeba (ina-eng)": 95.65, + "Tatoeba (kzj-eng)": 12.29, + "Tatoeba (por-eng)": 94.44, + "Tatoeba (bel-eng)": 88.09, + "Tatoeba (fao-eng)": 70.33, + "Tatoeba (tam-eng)": 72.83, + "Tatoeba (tat-eng)": 36.79, + "Tatoeba (vie-eng)": 94.83, + "Tatoeba (kaz-eng)": 46.88, + "Tatoeba (slv-eng)": 86.13, + "Tatoeba (lvs-eng)": 66.01, + "Tatoeba (ben-eng)": 81.82, + "Tatoeba (nno-eng)": 87.24, + "Tatoeba (tzl-eng)": 49.51, + "Tatoeba (ron-eng)": 93.0, + "Tatoeba (cha-eng)": 44.8, + "Tatoeba (mar-eng)": 68.99, + "Tatoeba (hin-eng)": 95.28, + "Tatoeba (aze-eng)": 76.23, + "Tatoeba (mkd-eng)": 86.9, + "Tatoeba (jpn-eng)": 94.0, + "Tatoeba (wuu-eng)": 89.61, + "Tatoeba (gle-eng)": 73.71, + "Tatoeba (arz-eng)": 70.73, + "Tatoeba (swg-eng)": 64.75, + "Tatoeba (rus-eng)": 93.75, + "Tatoeba (ukr-eng)": 94.58, + "Tatoeba (ell-eng)": 91.46, + "Tatoeba (hun-eng)": 89.8, + "Tatoeba (bul-eng)": 93.98, + "Tatoeba (oci-eng)": 62.2, + "Tatoeba (awa-eng)": 67.45, + "Tatoeba (uzb-eng)": 52.73, + "Tatoeba (kur-eng)": 29.93, + "Tatoeba (fra-eng)": 95.66, + "Tatoeba (ido-eng)": 74.98, + "Tatoeba (dsb-eng)": 65.43, + "Tatoeba (hsb-eng)": 74.4, + "Tatoeba (swh-eng)": 61.6, + "Tatoeba (hye-eng)": 63.99, + "Tatoeba (isl-eng)": 87.58, + "Tatoeba (pol-eng)": 96.47, + "Tatoeba (yue-eng)": 89.11, + "Tatoeba (khm-eng)": 37.31, + "Tatoeba (lfn-eng)": 73.89, + "Tatoeba (srp-eng)": 92.54, + "Tatoeba (lat-eng)": 87.8, + "Tatoeba (pam-eng)": 14.02, + "Tatoeba (ast-eng)": 80.94, + "Tatoeba (orv-eng)": 59.66, + "Tatoeba (cat-eng)": 91.89, + "Tatoeba (amh-eng)": 22.05, + "Tatoeba (deu-eng)": 99.47, + "Tatoeba (war-eng)": 41.97, + "Tatoeba (tha-eng)": 93.64, + "Tatoeba (nds-eng)": 77.42, + "Tatoeba (gla-eng)": 63.53, + "Tatoeba (kat-eng)": 59.67, + "Tatoeba (epo-eng)": 87.76, + "Tatoeba (cor-eng)": 9.19, + "Tatoeba (lit-eng)": 69.07, + "Tatoeba (mhr-eng)": 16.24, + "Tatoeba (slk-eng)": 88.84, + "Tatoeba (eus-eng)": 40.64, + "Tatoeba (hrv-eng)": 93.97, + "Tatoeba (tgl-eng)": 93.14, + "Tatoeba (sqi-eng)": 67.06, + "Tatoeba (ita-eng)": 91.94, + "Tatoeba (tur-eng)": 94.13, + "Tatoeba (tuk-eng)": 42.01, + "Tatoeba (fin-eng)": 90.69, + "Tatoeba (fry-eng)": 70.98, + "Tatoeba (afr-eng)": 87.32 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "e5-mistral-7b-instruct", + "AllegroReviews": 59.78, + "AmazonCounterfactualClassification (en-ext)": 74.6, + "AmazonCounterfactualClassification (en)": 74.57, + "AmazonCounterfactualClassification (de)": 71.25, + "AmazonCounterfactualClassification (ja)": 72.33, + "AmazonPolarityClassification": 96.26, + "AmazonReviewsClassification (en)": 55.97, + "AmazonReviewsClassification (de)": 54.94, + "AmazonReviewsClassification (es)": 51.62, + "AmazonReviewsClassification (fr)": 50.27, + "AmazonReviewsClassification (ja)": 51.32, + "AmazonReviewsClassification (zh)": 47.6, + "AmazonReviewsClassification": 36.71, + "AngryTweetsClassification": 65.01, + "Banking77Classification": 81.41, + "CBD": 72.59, + "DanishPoliticalCommentsClassification": 34.68, + "EmotionClassification": 58.42, + "GeoreviewClassification": 56.72, + "HeadlineClassification": 87.02, + "IFlyTek": 48.65, + "ImdbClassification": 94.73, + "InappropriatenessClassification": 70.36, + "JDReview": 84.69, + "KinopoiskClassification": 68.35, + "LccSentimentClassification": 64.53, + "MTOPDomainClassification (en)": 95.33, + "MTOPDomainClassification (de)": 90.48, + "MTOPDomainClassification (es)": 90.22, + "MTOPDomainClassification (fr)": 88.49, + "MTOPDomainClassification (hi)": 86.67, + "MTOPDomainClassification (th)": 83.42, + "MTOPDomainClassification": 74.8, + "MTOPIntentClassification (en)": 78.99, + "MTOPIntentClassification (de)": 69.37, + "MTOPIntentClassification (es)": 73.45, + "MTOPIntentClassification (fr)": 70.6, + "MTOPIntentClassification (hi)": 59.78, + "MTOPIntentClassification (th)": 62.24, + "MTOPIntentClassification": 53.97, + "MasakhaNEWSClassification (amh)": 53.11, + "MasakhaNEWSClassification (eng)": 85.89, + "MasakhaNEWSClassification (fra)": 82.94, + "MasakhaNEWSClassification (hau)": 76.73, + "MasakhaNEWSClassification (ibo)": 77.49, + "MasakhaNEWSClassification (lin)": 83.94, + "MasakhaNEWSClassification (lug)": 73.95, + "MasakhaNEWSClassification (orm)": 77.72, + "MasakhaNEWSClassification (pcm)": 96.03, + "MasakhaNEWSClassification (run)": 85.81, + "MasakhaNEWSClassification (sna)": 89.73, + "MasakhaNEWSClassification (som)": 66.19, + "MasakhaNEWSClassification (swa)": 79.5, + "MasakhaNEWSClassification (tir)": 33.05, + "MasakhaNEWSClassification (xho)": 87.71, + "MasakhaNEWSClassification (yor)": 85.55, + "MasakhaNEWSClassification": 80.59, + "MassiveIntentClassification (ro)": 63.81, + "MassiveIntentClassification (bn)": 58.21, + "MassiveIntentClassification (ja)": 71.72, + "MassiveIntentClassification (ko)": 69.85, + "MassiveIntentClassification (fi)": 64.37, + "MassiveIntentClassification (pl)": 71.03, + "MassiveIntentClassification (fa)": 69.02, + "MassiveIntentClassification (cy)": 46.83, + "MassiveIntentClassification (hy)": 46.34, + "MassiveIntentClassification (lv)": 49.61, + "MassiveIntentClassification (sl)": 63.04, + "MassiveIntentClassification (sq)": 48.78, + "MassiveIntentClassification (vi)": 66.52, + "MassiveIntentClassification (is)": 52.71, + "MassiveIntentClassification (pt)": 71.63, + "MassiveIntentClassification (hu)": 64.87, + "MassiveIntentClassification (hi)": 63.91, + "MassiveIntentClassification (it)": 71.35, + "MassiveIntentClassification (km)": 39.57, + "MassiveIntentClassification (es)": 70.88, + "MassiveIntentClassification (ur)": 57.26, + "MassiveIntentClassification (ar)": 57.67, + "MassiveIntentClassification (fr)": 71.66, + "MassiveIntentClassification (nb)": 65.35, + "MassiveIntentClassification (de)": 70.52, + "MassiveIntentClassification (az)": 58.14, + "MassiveIntentClassification (he)": 62.36, + "MassiveIntentClassification (zh-TW)": 65.89, + "MassiveIntentClassification (mn)": 40.83, + "MassiveIntentClassification (am)": 34.74, + "MassiveIntentClassification (nl)": 71.24, + "MassiveIntentClassification (sw)": 51.71, + "MassiveIntentClassification (th)": 61.29, + "MassiveIntentClassification (my)": 38.8, + "MassiveIntentClassification (zh-CN)": 72.46, + "MassiveIntentClassification (ka)": 45.05, + "MassiveIntentClassification (jv)": 50.08, + "MassiveIntentClassification (tl)": 62.43, + "MassiveIntentClassification (ml)": 41.09, + "MassiveIntentClassification (sv)": 70.41, + "MassiveIntentClassification (ta)": 44.28, + "MassiveIntentClassification (ms)": 66.36, + "MassiveIntentClassification (tr)": 66.9, + "MassiveIntentClassification (id)": 68.0, + "MassiveIntentClassification (ru)": 73.74, + "MassiveIntentClassification (te)": 45.47, + "MassiveIntentClassification (af)": 62.05, + "MassiveIntentClassification (el)": 64.04, + "MassiveIntentClassification (da)": 68.11, + "MassiveIntentClassification (en)": 77.0, + "MassiveIntentClassification (kn)": 45.41, + "MassiveIntentClassification": 46.39, + "MassiveScenarioClassification (id)": 73.78, + "MassiveScenarioClassification (km)": 48.42, + "MassiveScenarioClassification (pt)": 74.21, + "MassiveScenarioClassification (mn)": 48.26, + "MassiveScenarioClassification (ro)": 68.97, + "MassiveScenarioClassification (ru)": 77.1, + "MassiveScenarioClassification (fi)": 68.83, + "MassiveScenarioClassification (tl)": 70.87, + "MassiveScenarioClassification (cy)": 58.84, + "MassiveScenarioClassification (hy)": 53.61, + "MassiveScenarioClassification (am)": 42.25, + "MassiveScenarioClassification (tr)": 70.73, + "MassiveScenarioClassification (ka)": 54.37, + "MassiveScenarioClassification (es)": 74.94, + "MassiveScenarioClassification (nl)": 76.48, + "MassiveScenarioClassification (sl)": 70.59, + "MassiveScenarioClassification (nb)": 72.48, + "MassiveScenarioClassification (jv)": 61.38, + "MassiveScenarioClassification (az)": 63.77, + "MassiveScenarioClassification (th)": 69.67, + "MassiveScenarioClassification (ar)": 67.15, + "MassiveScenarioClassification (bn)": 64.73, + "MassiveScenarioClassification (de)": 77.22, + "MassiveScenarioClassification (ko)": 74.41, + "MassiveScenarioClassification (zh-TW)": 71.62, + "MassiveScenarioClassification (te)": 53.65, + "MassiveScenarioClassification (ml)": 49.48, + "MassiveScenarioClassification (ta)": 52.58, + "MassiveScenarioClassification (hu)": 70.97, + "MassiveScenarioClassification (el)": 69.77, + "MassiveScenarioClassification (zh-CN)": 76.4, + "MassiveScenarioClassification (fa)": 74.61, + "MassiveScenarioClassification (da)": 73.77, + "MassiveScenarioClassification (kn)": 55.85, + "MassiveScenarioClassification (fr)": 75.65, + "MassiveScenarioClassification (ms)": 72.85, + "MassiveScenarioClassification (ja)": 75.79, + "MassiveScenarioClassification (hi)": 69.65, + "MassiveScenarioClassification (my)": 46.32, + "MassiveScenarioClassification (en)": 79.13, + "MassiveScenarioClassification (lv)": 58.4, + "MassiveScenarioClassification (pl)": 75.09, + "MassiveScenarioClassification (sq)": 58.29, + "MassiveScenarioClassification (ur)": 64.12, + "MassiveScenarioClassification (is)": 63.59, + "MassiveScenarioClassification (sv)": 76.81, + "MassiveScenarioClassification (he)": 65.48, + "MassiveScenarioClassification (af)": 71.0, + "MassiveScenarioClassification (it)": 74.88, + "MassiveScenarioClassification (sw)": 62.31, + "MassiveScenarioClassification (vi)": 71.92, + "MassiveScenarioClassification": 53.86, + "MultilingualSentiment": 73.47, + "NoRecClassification": 55.0, + "NordicLangClassification": 69.13, + "OnlineShopping": 92.56, + "PAC": 62.26, + "PolEmo2.0-IN": 85.58, + "PolEmo2.0-OUT": 60.55, + "RuReviewsClassification": 70.57, + "RuSciBenchGRNTIClassification": 66.05, + "RuSciBenchOECDClassification": 52.11, + "TNews": 50.58, + "ToxicConversationsClassification": 71.75, + "TweetSentimentExtractionClassification": 64.89, + "Waimai": 87.79 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "e5-mistral-7b-instruct", + "AlloProfClusteringP2P": 61.06, + "AlloProfClusteringS2S": 28.12, + "GeoreviewClusteringP2P": 76.32, + "HALClusteringS2S": 19.69, + "MLSUMClusteringP2P": 45.59, + "MLSUMClusteringS2S": 32.0, + "MasakhaNEWSClusteringP2P (amh)": 47.57, + "MasakhaNEWSClusteringP2P (eng)": 71.97, + "MasakhaNEWSClusteringP2P (fra)": 81.18, + "MasakhaNEWSClusteringP2P (hau)": 72.23, + "MasakhaNEWSClusteringP2P (ibo)": 66.65, + "MasakhaNEWSClusteringP2P (lin)": 75.85, + "MasakhaNEWSClusteringP2P (lug)": 53.69, + "MasakhaNEWSClusteringP2P (orm)": 52.91, + "MasakhaNEWSClusteringP2P (pcm)": 92.6, + "MasakhaNEWSClusteringP2P (run)": 62.47, + "MasakhaNEWSClusteringP2P (sna)": 82.99, + "MasakhaNEWSClusteringP2P (som)": 41.28, + "MasakhaNEWSClusteringP2P (swa)": 50.54, + "MasakhaNEWSClusteringP2P (tir)": 46.94, + "MasakhaNEWSClusteringP2P (xho)": 59.17, + "MasakhaNEWSClusteringP2P (yor)": 65.86, + "MasakhaNEWSClusteringP2P": 52.47, + "MasakhaNEWSClusteringS2S (amh)": 47.55, + "MasakhaNEWSClusteringS2S (eng)": 74.23, + "MasakhaNEWSClusteringS2S (fra)": 74.9, + "MasakhaNEWSClusteringS2S (hau)": 40.78, + "MasakhaNEWSClusteringS2S (ibo)": 59.84, + "MasakhaNEWSClusteringS2S (lin)": 80.96, + "MasakhaNEWSClusteringS2S (lug)": 46.03, + "MasakhaNEWSClusteringS2S (orm)": 36.15, + "MasakhaNEWSClusteringS2S (pcm)": 92.94, + "MasakhaNEWSClusteringS2S (run)": 62.3, + "MasakhaNEWSClusteringS2S (sna)": 52.03, + "MasakhaNEWSClusteringS2S (som)": 36.18, + "MasakhaNEWSClusteringS2S (swa)": 35.33, + "MasakhaNEWSClusteringS2S (tir)": 43.92, + "MasakhaNEWSClusteringS2S (xho)": 26.61, + "MasakhaNEWSClusteringS2S (yor)": 63.18, + "MasakhaNEWSClusteringS2S": 49.2, + "RuSciBenchGRNTIClusteringP2P": 62.27, + "RuSciBenchOECDClusteringP2P": 54.13 } ] }, - "Clustering": { - "v_measure": [ + "PairClassification": { + "max_ap": [ { - "Model": "LaBSE", - "8TagsClustering": 12.96, - "AlloProfClusteringP2P": 54.78, - "AlloProfClusteringS2S": 31.6, - "ArxivClusteringP2P": 32.13, - "ArxivClusteringS2S": 22.05, - "BiorxivClusteringP2P": 29.98, - "BiorxivClusteringS2S": 20.24, - "GeoreviewClusteringP2P (rus-Cyrl)": 52.19, - "HALClusteringS2S": 20.62, - "MLSUMClusteringP2P": 42.09, - "MLSUMClusteringP2P (rus-Cyrl)": 39.45, - "MLSUMClusteringS2S": 34.84, - "MLSUMClusteringS2S (rus-Cyrl)": 35.77, - "MasakhaNEWSClusteringP2P (fra)": 46.16, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.78, - "MasakhaNEWSClusteringP2P (eng)": 48.16, - "MasakhaNEWSClusteringP2P (fra-Latn)": 46.16, - "MasakhaNEWSClusteringP2P (hau-Latn)": 39.77, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 62.67, - "MasakhaNEWSClusteringP2P (lin-Latn)": 62.98, - "MasakhaNEWSClusteringP2P (lug-Latn)": 47.76, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.76, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 77.16, - "MasakhaNEWSClusteringP2P (run-Latn)": 60.36, - "MasakhaNEWSClusteringP2P (sna-Latn)": 63.57, - "MasakhaNEWSClusteringP2P (som-Latn)": 34.94, - "MasakhaNEWSClusteringP2P (swa-Latn)": 27.26, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 51.59, - "MasakhaNEWSClusteringP2P (xho-Latn)": 45.32, - "MasakhaNEWSClusteringP2P (yor-Latn)": 48.73, - "MasakhaNEWSClusteringS2S (fra)": 38.13, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 52.73, - "MasakhaNEWSClusteringS2S (eng)": 32.6, - "MasakhaNEWSClusteringS2S (fra-Latn)": 38.13, - "MasakhaNEWSClusteringS2S (hau-Latn)": 31.62, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 32.27, - "MasakhaNEWSClusteringS2S (lin-Latn)": 49.38, - "MasakhaNEWSClusteringS2S (lug-Latn)": 47.63, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 25.05, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.18, - "MasakhaNEWSClusteringS2S (run-Latn)": 52.39, - "MasakhaNEWSClusteringS2S (sna-Latn)": 46.9, - "MasakhaNEWSClusteringS2S (som-Latn)": 24.08, - "MasakhaNEWSClusteringS2S (swa-Latn)": 15.83, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 49.07, - "MasakhaNEWSClusteringS2S (xho-Latn)": 28.52, - "MasakhaNEWSClusteringS2S (yor-Latn)": 32.26, - "MedrxivClusteringP2P": 29.84, - "MedrxivClusteringS2S": 24.89, - "RedditClustering": 28.53, - "RedditClusteringP2P": 49.23, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 49.07, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.97, - "StackExchangeClustering": 35.01, - "StackExchangeClusteringP2P": 28.44, - "TwentyNewsgroupsClustering": 22.53 + "Model": "e5-mistral-7b-instruct", + "CDSC-E": 75.86, + "OpusparcusPC (de)": 97.63, + "OpusparcusPC (en)": 99.1, + "OpusparcusPC (fi)": 92.76, + "OpusparcusPC (fr)": 95.23, + "OpusparcusPC (ru)": 91.44, + "OpusparcusPC (sv)": 95.54, + "PSC": 99.47, + "PawsXPairClassification (de)": 58.47, + "PawsXPairClassification (en)": 67.1, + "PawsXPairClassification (es)": 60.03, + "PawsXPairClassification (fr)": 61.85, + "PawsXPairClassification (ja)": 51.95, + "PawsXPairClassification (ko)": 53.11, + "PawsXPairClassification (zh)": 59.29, + "SICK-E-PL": 79.94, + "SprintDuplicateQuestions": 95.66, + "TERRa": 60.81, + "TwitterSemEval2015": 81.62, + "TwitterURLCorpus": 87.75 + }, + { + "Model": "e5-mistral-7b-instruct", + "CDSC-E": 75.86, + "OpusparcusPC (de)": 97.63, + "OpusparcusPC (en)": 99.1, + "OpusparcusPC (fi)": 92.76, + "OpusparcusPC (fr)": 95.23, + "OpusparcusPC (ru)": 91.44, + "OpusparcusPC (sv)": 95.54, + "OpusparcusPC": 89.71, + "PSC": 99.53, + "PawsXPairClassification (de)": 58.67, + "PawsXPairClassification (en)": 67.34, + "PawsXPairClassification (es)": 60.24, + "PawsXPairClassification (fr)": 62.03, + "PawsXPairClassification (ja)": 52.15, + "PawsXPairClassification (ko)": 53.14, + "PawsXPairClassification (zh)": 59.4, + "PawsXPairClassification": 64.33, + "SICK-E-PL": 79.94, + "SprintDuplicateQuestions": 95.86, + "TERRa": 60.81, + "TwitterSemEval2015": 81.62, + "TwitterURLCorpus": 87.79 + }, + { + "Model": "e5-mistral-7b-instruct", + "OpusparcusPC": 88.5, + "PawsXPairClassification": 63.65 } ] }, - "PairClassification": { - "max_ap": [ + "Reranking": { + "map": [ { - "Model": "LaBSE", - "CDSC-E": 68.91, - "OpusparcusPC (fr)": 93.96, - "PPC": 86.97, - "PSC": 97.42, - "PawsXPairClassification (fr)": 54.63, - "SICK-E-PL": 63.77, - "SprintDuplicateQuestions": 89.26, - "TwitterSemEval2015": 62.78, - "TwitterURLCorpus": 84.58 + "Model": "e5-mistral-7b-instruct", + "AlloprofReranking": 47.36, + "AskUbuntuDupQuestions": 66.98, + "MMarcoReranking": 24.21, + "MindSmallReranking": 32.59, + "RuBQReranking": 76.32, + "SciDocsRR": 86.34, + "StackOverflowDupQuestions": 54.93, + "SyntecReranking": 77.05, + "T2Reranking": 66.9 }, { - "Model": "LaBSE", - "CDSC-E": 68.92, - "CDSC-E (pol-Latn)": 68.92, - "OpusparcusPC (fr)": 93.96, - "OpusparcusPC (deu-Latn)": 96.58, - "OpusparcusPC (en)": 98.12, - "OpusparcusPC (fin-Latn)": 94.44, - "OpusparcusPC (fra-Latn)": 93.96, - "OpusparcusPC (rus-Cyrl)": 87.3, - "OpusparcusPC (swe-Latn)": 93.69, - "PPC": 86.97, - "PSC": 97.42, - "PSC (pol-Latn)": 97.42, - "PawsXPairClassification (fr)": 54.69, - "PawsXPairClassification (deu-Latn)": 51.45, - "PawsXPairClassification (en)": 54.07, - "PawsXPairClassification (spa-Latn)": 52.19, - "PawsXPairClassification (fra-Latn)": 54.7, - "PawsXPairClassification (jpn-Hira)": 47.74, - "PawsXPairClassification (kor-Hang)": 49.42, - "PawsXPairClassification (cmn-Hans)": 54.55, - "SICK-E-PL": 63.77, - "SICK-E-PL (pol-Latn)": 63.77, - "SprintDuplicateQuestions": 89.26, - "TERRa (rus-Cyrl)": 55.71, - "TwitterSemEval2015": 62.78, - "TwitterURLCorpus": 84.58 + "Model": "e5-mistral-7b-instruct", + "MIRACLReranking (ru)": 63.61 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-mistral-7b-instruct", + "AILACasedocs": 38.76, + "AILAStatutes": 38.07, + "ARCChallenge": 19.0, + "AlloprofRetrieval": 16.46, + "AlphaNLI": 26.02, + "AppsRetrieval": 23.46, + "ArguAna": 61.65, + "ArguAna-PL": 49.2, + "BSARDRetrieval": 0.0, + "BrightRetrieval (sustainable_living)": 18.51, + "BrightRetrieval (economics)": 15.49, + "BrightRetrieval (theoremqa_theorems)": 25.09, + "BrightRetrieval (aops)": 7.1, + "BrightRetrieval (theoremqa_questions)": 23.94, + "BrightRetrieval (stackoverflow)": 9.83, + "BrightRetrieval (psychology)": 15.79, + "BrightRetrieval (pony)": 4.81, + "BrightRetrieval (leetcode)": 28.72, + "BrightRetrieval (biology)": 18.84, + "BrightRetrieval (earth_science)": 25.96, + "BrightRetrieval (robotics)": 16.37, + "CmedqaRetrieval": 34.23, + "CodeFeedbackMT": 36.4, + "CodeFeedbackST": 76.41, + "CodeSearchNetCCRetrieval (python)": 90.6, + "CodeSearchNetCCRetrieval (javascript)": 86.18, + "CodeSearchNetCCRetrieval (go)": 84.05, + "CodeSearchNetCCRetrieval (ruby)": 85.89, + "CodeSearchNetCCRetrieval (java)": 86.21, + "CodeSearchNetCCRetrieval (php)": 75.95, + "CodeSearchNetRetrieval (python)": 91.75, + "CodeSearchNetRetrieval (javascript)": 80.93, + "CodeSearchNetRetrieval (go)": 93.06, + "CodeSearchNetRetrieval (ruby)": 85.37, + "CodeSearchNetRetrieval (java)": 84.08, + "CodeSearchNetRetrieval (php)": 83.14, + "CodeTransOceanContest": 88.58, + "CodeTransOceanDL": 31.74, + "CosQA": 33.1, + "CovidRetrieval": 73.11, + "DuRetrieval": 87.04, + "EcomRetrieval": 45.94, + "FiQA-PL": 35.34, + "FiQA2018": 56.81, + "GerDaLIRSmall": 37.18, + "HellaSwag": 35.37, + "LEMBNarrativeQARetrieval": 44.62, + "LEMBQMSumRetrieval": 43.63, + "LEMBSummScreenFDRetrieval": 96.82, + "LEMBWikimQARetrieval": 82.11, + "LeCaRDv2": 68.56, + "LegalBenchConsumerContractsQA": 75.46, + "LegalBenchCorporateLobbying": 94.01, + "LegalQuAD": 59.64, + "LegalSummarization": 66.51, + "MIRACLRetrieval (ru)": 67.66, + "MMarcoRetrieval": 74.84, + "MedicalRetrieval": 52.83, + "MintakaRetrieval (ar)": 24.33, + "MintakaRetrieval (de)": 46.42, + "MintakaRetrieval (es)": 44.42, + "MintakaRetrieval (fr)": 46.07, + "MintakaRetrieval (hi)": 25.5, + "MintakaRetrieval (it)": 43.36, + "MintakaRetrieval (ja)": 33.72, + "MintakaRetrieval (pt)": 47.49, + "MintakaRetrieval": 3.57, + "NFCorpus": 38.58, + "NFCorpus-PL": 30.97, + "PIQA": 39.83, + "Quail": 7.28, + "RARbCode": 79.77, + "RARbMath": 72.0, + "RiaNewsRetrieval": 78.94, + "RuBQRetrieval": 75.98, + "SCIDOCS": 16.32, + "SCIDOCS-PL": 16.9, + "SIQA": 5.68, + "SciFact": 76.42, + "SciFact-PL": 68.11, + "SpartQA": 10.03, + "StackOverflowQA": 91.02, + "SyntecRetrieval": 55.9, + "SyntheticText2SQL": 59.2, + "T2Retrieval": 80.68, + "TRECCOVID": 87.03, + "TRECCOVID-PL": 73.58, + "TempReasonL1": 3.6, + "TempReasonL2Fact": 36.19, + "TempReasonL2Pure": 8.93, + "TempReasonL3Fact": 29.98, + "TempReasonL3Pure": 14.15, + "Touche2020": 26.27, + "VideoRetrieval": 45.34, + "WinoGrande": 39.51, + "XPQARetrieval (ara-ara)": 45.94, + "XPQARetrieval (eng-ara)": 30.38, + "XPQARetrieval (ara-eng)": 41.53, + "XPQARetrieval (deu-deu)": 77.83, + "XPQARetrieval (eng-deu)": 41.58, + "XPQARetrieval (deu-eng)": 72.43, + "XPQARetrieval (spa-spa)": 60.56, + "XPQARetrieval (eng-spa)": 29.4, + "XPQARetrieval (spa-eng)": 58.02, + "XPQARetrieval (fra-fra)": 69.72, + "XPQARetrieval (eng-fra)": 38.41, + "XPQARetrieval (fra-eng)": 64.51, + "XPQARetrieval (hin-hin)": 73.77, + "XPQARetrieval (eng-hin)": 19.11, + "XPQARetrieval (hin-eng)": 66.29, + "XPQARetrieval (ita-ita)": 74.27, + "XPQARetrieval (eng-ita)": 26.44, + "XPQARetrieval (ita-eng)": 67.28, + "XPQARetrieval (jpn-jpn)": 72.92, + "XPQARetrieval (eng-jpn)": 39.98, + "XPQARetrieval (jpn-eng)": 69.65, + "XPQARetrieval (kor-kor)": 39.18, + "XPQARetrieval (eng-kor)": 30.34, + "XPQARetrieval (kor-eng)": 33.83, + "XPQARetrieval (pol-pol)": 46.33, + "XPQARetrieval (eng-pol)": 32.92, + "XPQARetrieval (pol-eng)": 43.03, + "XPQARetrieval (por-por)": 49.25, + "XPQARetrieval (eng-por)": 24.44, + "XPQARetrieval (por-eng)": 48.68, + "XPQARetrieval (tam-tam)": 43.15, + "XPQARetrieval (eng-tam)": 3.59, + "XPQARetrieval (tam-eng)": 20.34, + "XPQARetrieval (cmn-cmn)": 63.0, + "XPQARetrieval (eng-cmn)": 34.01, + "XPQARetrieval (cmn-eng)": 56.43, + "XPQARetrieval": 41.29 + } + ], + "recall_at_1": [ + { + "Model": "e5-mistral-7b-instruct", + "BrightRetrieval (pony)": 1.14, + "BrightRetrieval (robotics)": 17.33, + "BrightRetrieval (economics)": 26.21, + "BrightRetrieval (biology)": 29.93, + "BrightRetrieval (earth_science)": 36.28, + "BrightRetrieval (psychology)": 46.73, + "BrightRetrieval (sustainable_living)": 32.21, + "BrightRetrieval (stackoverflow)": 14.53 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "e5-mistral-7b-instruct", + "AFQMC": 38.99, + "ATEC": 42.84, + "BIOSSES": 85.5, + "BQ": 50.63, + "CDSC-R": 92.19, + "LCQMC": 75.48, + "PAWSX": 16.81, + "RUParaPhraserSTS": 76.17, + "RuSTSBenchmarkSTS": 84.13, + "SICK-R": 82.64, + "SICK-R-PL": 76.67, + "SICKFr": 80.99, + "STS12": 79.65, + "STS13": 88.43, + "STS14": 84.54, + "STS15": 90.42, + "STS16": 87.69, + "STS17 (ar-ar)": 81.87, + "STS17 (en-ar)": 77.95, + "STS17 (nl-en)": 88.25, + "STS17 (en-tr)": 72.59, + "STS17 (en-de)": 87.3, + "STS17 (ko-ko)": 83.69, + "STS17 (es-es)": 87.46, + "STS17 (en-en)": 91.76, + "STS17 (es-en)": 88.24, + "STS17 (fr-en)": 88.08, + "STS17 (it-en)": 89.69, + "STS22 (ru)": 60.83, + "STSB": 81.81, + "STSBenchmark": 88.6, + "STSBenchmarkMultilingualSTS (pl)": 83.62, + "STSBenchmarkMultilingualSTS (es)": 86.13, + "STSBenchmarkMultilingualSTS (ru)": 84.25, + "STSBenchmarkMultilingualSTS (nl)": 83.69, + "STSBenchmarkMultilingualSTS (de)": 85.37, + "STSBenchmarkMultilingualSTS (zh)": 82.61, + "STSBenchmarkMultilingualSTS (en)": 88.6, + "STSBenchmarkMultilingualSTS (fr)": 85.49, + "STSBenchmarkMultilingualSTS (it)": 84.43, + "STSBenchmarkMultilingualSTS (pt)": 84.57 }, { - "Model": "LaBSE", - "CDSC-E (pol-Latn)": 68.92, - "OpusparcusPC (deu-Latn)": 96.58, - "OpusparcusPC (en)": 98.12, - "OpusparcusPC (fin-Latn)": 94.44, - "OpusparcusPC (fra-Latn)": 93.96, - "OpusparcusPC (rus-Cyrl)": 87.3, - "OpusparcusPC (swe-Latn)": 93.69, - "PSC (pol-Latn)": 97.42, - "PawsXPairClassification (deu-Latn)": 51.07, - "PawsXPairClassification (en)": 54.07, - "PawsXPairClassification (spa-Latn)": 52.19, - "PawsXPairClassification (fra-Latn)": 54.63, - "PawsXPairClassification (jpn-Hira)": 47.56, - "PawsXPairClassification (kor-Hang)": 49.39, - "PawsXPairClassification (cmn-Hans)": 54.26, - "SICK-E-PL (pol-Latn)": 63.77, - "SprintDuplicateQuestions": 89.26, - "TERRa (rus-Cyrl)": 55.71, - "TwitterSemEval2015": 62.78, - "TwitterURLCorpus": 84.58 + "Model": "e5-mistral-7b-instruct", + "AFQMC": 38.99, + "ATEC": 42.84, + "BIOSSES": 85.5, + "BQ": 50.63, + "CDSC-R": 92.19, + "LCQMC": 75.48, + "PAWSX": 16.81, + "RUParaPhraserSTS": 76.17, + "RuSTSBenchmarkSTS": 84.13, + "SICK-R": 82.64, + "SICK-R-PL": 76.67, + "SICKFr": 80.99, + "STS12": 79.65, + "STS13": 88.43, + "STS14": 84.54, + "STS15": 90.42, + "STS16": 87.69, + "STS17 (ar-ar)": 81.87, + "STS17 (en-ar)": 77.95, + "STS17 (nl-en)": 88.25, + "STS17 (en-tr)": 72.59, + "STS17 (en-de)": 87.3, + "STS17 (ko-ko)": 83.69, + "STS17 (es-es)": 87.46, + "STS17 (en-en)": 91.76, + "STS17 (es-en)": 88.24, + "STS17 (fr-en)": 88.08, + "STS17 (it-en)": 89.69, + "STS22 (ru)": 60.83, + "STSB": 81.81, + "STSBenchmark": 88.6, + "STSBenchmarkMultilingualSTS (pl)": 83.62, + "STSBenchmarkMultilingualSTS (es)": 86.13, + "STSBenchmarkMultilingualSTS (ru)": 84.25, + "STSBenchmarkMultilingualSTS (nl)": 83.69, + "STSBenchmarkMultilingualSTS (de)": 85.37, + "STSBenchmarkMultilingualSTS (zh)": 82.61, + "STSBenchmarkMultilingualSTS (en)": 88.6, + "STSBenchmarkMultilingualSTS (fr)": 85.49, + "STSBenchmarkMultilingualSTS (it)": 84.43, + "STSBenchmarkMultilingualSTS (pt)": 84.57 + }, + { + "Model": "e5-mistral-7b-instruct", + "SICKFr": 64.39, + "STS22": 69.82, + "STSBenchmarkMultilingualSTS": 61.87 + } + ] + }, + "Summarization": { + "cosine_spearman": [ + { + "Model": "e5-mistral-7b-instruct", + "SummEval": 31.53, + "SummEvalFr": 31.05 + }, + { + "Model": "e5-mistral-7b-instruct", + "SummEval": 31.53, + "SummEvalFr": 31.05 + }, + { + "Model": "e5-mistral-7b-instruct", + "SummEvalFr": 32.22 + } + ] + }, + "MultilabelClassification": { + "accuracy": [ + { + "Model": "e5-mistral-7b-instruct", + "CEDRClassification": 51.94, + "SensitiveTopicsClassification": 33.92 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "e5-mistral-7b-instruct", + "Core17InstructionRetrieval": 0.09, + "News21InstructionRetrieval": -0.86, + "Robust04InstructionRetrieval": -9.59 + } + ] + } + }, + "e5-mistral-7b-instruct-noinstruct": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct", + "ARCChallenge": 20.48, + "AlphaNLI": 18.88, + "HellaSwag": 32.25, + "PIQA": 32.8, + "Quail": 6.25, + "RARbCode": 79.84, + "RARbMath": 76.19, + "SIQA": 5.08, + "SpartQA": 10.87, + "TempReasonL1": 3.04, + "TempReasonL2Fact": 35.63, + "TempReasonL2Pure": 9.32, + "TempReasonL3Fact": 30.41, + "TempReasonL3Pure": 14.39, + "WinoGrande": 45.18 + } + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "e5-small": { + "BitextMining": { + "f1": [ + { + "Model": "e5-small", + "BornholmBitextMining": 40.27 } ] }, - "Reranking": { - "map": [ + "Classification": { + "accuracy": [ { - "Model": "LaBSE", - "AlloprofReranking": 49.51, - "AlloprofReranking (fra-Latn)": 55.37, - "AskUbuntuDupQuestions": 52.75, - "MMarcoReranking (cmn-Hans)": 14.83, - "MindSmallReranking": 29.81, - "RuBQReranking (rus-Cyrl)": 55.13, - "SciDocsRR": 68.72, - "StackOverflowDupQuestions": 42.42, - "SyntecReranking": 73.28, - "SyntecReranking (fra-Latn)": 67.62, - "T2Reranking (cmn-Hans)": 63.29 + "Model": "e5-small", + "AngryTweetsClassification": 43.6, + "DKHateClassification": 57.57, + "DanishPoliticalCommentsClassification": 28.37, + "LccSentimentClassification": 40.27, + "MassiveIntentClassification": 40.07, + "MassiveScenarioClassification": 47.06, + "NoRecClassification": 41.84, + "NordicLangClassification": 53.47, + "NorwegianParliament": 56.57, + "ScalaDaClassification": 50.15, + "ScalaNbClassification": 50.03 } ] }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "LaBSE", - "AILACasedocs": 17.67, - "AILAStatutes": 16.72, - "ARCChallenge": 3.78, - "AlloprofRetrieval": 19.77, - "AlloprofRetrieval (fra-Latn)": 19.77, - "AlphaNLI": 13.11, - "AppsRetrieval (eng-Latn_python-Code)": 2.39, - "ArguAna": 34.18, - "ArguAna-PL": 38.52, - "ArguAna-PL (pol-Latn)": 38.56, - "BSARDRetrieval": 0.0, - "BSARDRetrieval (fra-Latn)": 4.44, - "CQADupstackRetrieval": 18.75, - "ClimateFEVER": 3.83, - "CmedqaRetrieval (cmn-Hans)": 5.49, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 17.98, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 35.98, - "CodeSearchNetCCRetrieval (python-Code)": 40.41, - "CodeSearchNetCCRetrieval (javascript-Code)": 52.0, - "CodeSearchNetCCRetrieval (go-Code)": 41.73, - "CodeSearchNetCCRetrieval (ruby-Code)": 41.46, - "CodeSearchNetCCRetrieval (java-Code)": 45.83, - "CodeSearchNetCCRetrieval (php-Code)": 31.96, - "CodeSearchNetRetrieval (python-Code)": 60.52, - "CodeSearchNetRetrieval (javascript-Code)": 46.24, - "CodeSearchNetRetrieval (go-Code)": 48.56, - "CodeSearchNetRetrieval (ruby-Code)": 51.34, - "CodeSearchNetRetrieval (java-Code)": 35.05, - "CodeSearchNetRetrieval (php-Code)": 46.03, - "CodeTransOceanContest (python-Code_c++-Code)": 28.1, - "CodeTransOceanDL": 26.45, - "CosQA (eng-Latn_python-Code)": 8.77, - "CovidRetrieval (cmn-Hans)": 28.6, - "DBPedia": 15.57, - "DBPedia-PL": 16.1, - "DuRetrieval (cmn-Hans)": 26.34, - "EcomRetrieval (cmn-Hans)": 25.42, - "FEVER": 12.18, - "FiQA-PL": 7.63, - "FiQA-PL (pol-Latn)": 7.66, - "FiQA2018": 7.0, - "GerDaLIRSmall (deu-Latn)": 4.59, - "HellaSwag": 5.59, - "HotpotQA": 18.75, - "HotpotQA-PL": 19.72, - "LEMBNarrativeQARetrieval": 11.45, - "LEMBQMSumRetrieval": 14.07, - "LEMBSummScreenFDRetrieval": 40.52, - "LEMBWikimQARetrieval": 28.1, - "LeCaRDv2 (zho-Hans)": 24.68, - "LegalBenchConsumerContractsQA": 54.66, - "LegalBenchCorporateLobbying": 69.39, - "LegalQuAD (deu-Latn)": 16.64, - "LegalSummarization": 53.89, - "MMarcoRetrieval (cmn-Hans)": 34.78, - "MSMARCO": 7.6, - "MSMARCO-PL": 7.22, - "MedicalRetrieval (cmn-Hans)": 6.68, - "MintakaRetrieval (fr)": 15.53, - "MintakaRetrieval (ara-Arab)": 14.06, - "MintakaRetrieval (deu-Latn)": 15.26, - "MintakaRetrieval (spa-Latn)": 15.65, - "MintakaRetrieval (fra-Latn)": 15.53, - "MintakaRetrieval (hin-Deva)": 13.67, - "MintakaRetrieval (ita-Latn)": 15.94, - "MintakaRetrieval (jpn-Hira)": 12.8, - "MintakaRetrieval (por-Latn)": 15.03, - "NFCorpus": 16.54, - "NFCorpus-PL": 17.45, - "NFCorpus-PL (pol-Latn)": 17.45, - "NQ": 8.42, - "NQ-PL": 9.65, - "PIQA": 6.53, - "Quail": 1.91, - "Quora-PL": 74.96, - "QuoraRetrieval": 77.03, - "RARbCode": 2.31, - "RARbMath": 27.19, - "RiaNewsRetrieval (rus-Cyrl)": 42.75, - "RuBQRetrieval (rus-Cyrl)": 30.02, - "SCIDOCS": 5.63, - "SCIDOCS-PL": 7.48, - "SCIDOCS-PL (pol-Latn)": 7.47, - "SIQA": 1.07, - "SciFact": 38.2, - "SciFact-PL": 39.79, - "SciFact-PL (pol-Latn)": 39.79, - "SpartQA": 1.56, - "StackOverflowQA": 38.23, - "SyntecRetrieval": 55.31, - "SyntecRetrieval (fra-Latn)": 55.31, - "SyntheticText2SQL (eng-Latn_sql-Code)": 43.28, - "T2Retrieval (cmn-Hans)": 25.32, - "TRECCOVID": 16.34, - "TRECCOVID-PL": 18.45, - "TRECCOVID-PL (pol-Latn)": 18.46, - "TempReasonL1": 1.56, - "TempReasonL2Fact": 7.06, - "TempReasonL2Pure": 0.14, - "TempReasonL3Fact": 8.74, - "TempReasonL3Pure": 4.73, - "Touche2020": 4.88, - "VideoRetrieval (cmn-Hans)": 22.04, - "WinoGrande": 54.3, - "XPQARetrieval (fr)": 51.74, - "XPQARetrieval (ara-Arab_ara-Arab)": 35.19, - "XPQARetrieval (eng-Latn_ara-Arab)": 20.64, - "XPQARetrieval (ara-Arab_eng-Latn)": 32.47, - "XPQARetrieval (deu-Latn_deu-Latn)": 53.56, - "XPQARetrieval (eng-Latn_deu-Latn)": 24.31, - "XPQARetrieval (deu-Latn_eng-Latn)": 54.87, - "XPQARetrieval (spa-Latn_spa-Latn)": 44.49, - "XPQARetrieval (eng-Latn_spa-Latn)": 25.31, - "XPQARetrieval (spa-Latn_eng-Latn)": 43.4, - "XPQARetrieval (fra-Latn_fra-Latn)": 51.74, - "XPQARetrieval (eng-Latn_fra-Latn)": 21.29, - "XPQARetrieval (fra-Latn_eng-Latn)": 49.4, - "XPQARetrieval (hin-Deva_hin-Deva)": 66.64, - "XPQARetrieval (eng-Latn_hin-Deva)": 23.25, - "XPQARetrieval (hin-Deva_eng-Latn)": 64.54, - "XPQARetrieval (ita-Latn_ita-Latn)": 56.27, - "XPQARetrieval (eng-Latn_ita-Latn)": 25.8, - "XPQARetrieval (ita-Latn_eng-Latn)": 52.69, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 58.6, - "XPQARetrieval (eng-Latn_jpn-Hira)": 21.49, - "XPQARetrieval (jpn-Hira_eng-Latn)": 52.41, - "XPQARetrieval (kor-Hang_kor-Hang)": 27.63, - "XPQARetrieval (eng-Latn_kor-Hang)": 23.33, - "XPQARetrieval (kor-Hang_eng-Latn)": 23.97, - "XPQARetrieval (pol-Latn_pol-Latn)": 37.33, - "XPQARetrieval (eng-Latn_pol-Latn)": 16.19, - "XPQARetrieval (pol-Latn_eng-Latn)": 37.7, - "XPQARetrieval (por-Latn_por-Latn)": 38.48, - "XPQARetrieval (eng-Latn_por-Latn)": 19.41, - "XPQARetrieval (por-Latn_eng-Latn)": 37.31, - "XPQARetrieval (tam-Taml_tam-Taml)": 37.33, - "XPQARetrieval (eng-Latn_tam-Taml)": 20.53, - "XPQARetrieval (tam-Taml_eng-Latn)": 30.14, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 50.7, - "XPQARetrieval (eng-Latn_cmn-Hans)": 20.59, - "XPQARetrieval (cmn-Hans_eng-Latn)": 48.23 - }, + "Clustering": { + "v_measure": [ { - "Model": "LaBSE", - "LEMBNeedleRetrieval": 17.5, - "LEMBPasskeyRetrieval": 20.0 + "Model": "e5-small", + "BiorxivClusteringP2P": 36.1, + "BiorxivClusteringS2S": 31.51, + "MedrxivClusteringP2P": 31.31, + "MedrxivClusteringS2S": 28.32, + "RedditClustering": 43.27, + "RedditClusteringP2P": 57.22, + "StackExchangeClustering": 59.6, + "StackExchangeClusteringP2P": 30.82, + "TwentyNewsgroupsClustering": 37.65 } ] }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, "STS": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "electra-small-nordic": { + "BitextMining": { + "f1": [ { - "Model": "LaBSE", - "AFQMC (cmn-Hans)": 21.02, - "ATEC (cmn-Hans)": 26.61, - "BIOSSES": 78.7, - "BQ (cmn-Hans)": 42.6, - "CDSC-R (pol-Latn)": 85.53, - "LCQMC (cmn-Hans)": 52.19, - "PAWSX (cmn-Hans)": 10.23, - "RUParaPhraserSTS (rus-Cyrl)": 65.74, - "RuSTSBenchmarkSTS (rus-Cyrl)": 73.34, - "SICK-R": 69.99, - "SICK-R-PL (pol-Latn)": 65.9, - "SICKFr (fra-Latn)": 69.94, - "STS12": 65.08, - "STS13": 67.98, - "STS14": 64.03, - "STS15": 76.59, - "STS16": 72.98, - "STS17 (spa-Latn_eng-Latn)": 65.71, - "STS17 (eng-Latn_deu-Latn)": 73.85, - "STS17 (fra-Latn_eng-Latn)": 76.98, - "STS17 (eng-Latn_tur-Latn)": 72.07, - "STS17 (kor-Hang)": 71.32, - "STS17 (spa-Latn)": 80.83, - "STS17 (ita-Latn_eng-Latn)": 76.99, - "STS17 (ara-Arab)": 69.07, - "STS17 (en-en)": 79.45, - "STS17 (nld-Latn_eng-Latn)": 75.22, - "STS17 (eng-Latn_ara-Arab)": 74.51, - "STS22 (pol-Latn_eng-Latn)": 69.41, - "STS22 (deu-Latn_eng-Latn)": 50.14, - "STS22 (spa-Latn)": 63.18, - "STS22 (deu-Latn_pol-Latn)": 58.69, - "STS22 (fra-Latn)": 77.95, - "STS22 (fra-Latn_pol-Latn)": 61.98, - "STS22 (deu-Latn)": 48.58, - "STS22 (pol-Latn)": 39.3, - "STS22 (en)": 61.63, - "STS22 (spa-Latn_ita-Latn)": 69.69, - "STS22 (cmn-Hans_eng-Latn)": 64.02, - "STS22 (deu-Latn_fra-Latn)": 53.28, - "STS22 (tur-Latn)": 58.15, - "STS22 (ita-Latn)": 72.22, - "STS22 (rus-Cyrl)": 57.49, - "STS22 (spa-Latn_eng-Latn)": 71.86, - "STS22 (ara-Arab)": 57.67, - "STS22 (cmn-Hans)": 63.02, - "STSB (cmn-Hans)": 68.38, - "STSBenchmark": 72.25, - "STSBenchmarkMultilingualSTS (nld-Latn)": 70.22, - "STSBenchmarkMultilingualSTS (deu-Latn)": 72.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 71.65, - "STSBenchmarkMultilingualSTS (fra-Latn)": 75.1, - "STSBenchmarkMultilingualSTS (en)": 72.25, - "STSBenchmarkMultilingualSTS (pol-Latn)": 72.58, - "STSBenchmarkMultilingualSTS (spa-Latn)": 72.92, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 69.5, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.06, - "STSBenchmarkMultilingualSTS (ita-Latn)": 72.97 - }, + "Model": "electra-small-nordic", + "BornholmBitextMining": 1.44 + } + ] + }, + "Classification": { + "accuracy": [ { - "Model": "LaBSE", - "BIOSSES": 78.7, - "CDSC-R": 85.53, - "SICK-R": 69.99, - "SICK-R-PL": 65.9, - "SICKFr": 69.94, - "STS12": 65.08, - "STS13": 67.98, - "STS14": 64.03, - "STS15": 76.59, - "STS16": 72.98, - "STS17 (ar-ar)": 69.07, - "STS17 (en-ar)": 74.51, - "STS17 (en-de)": 73.85, - "STS17 (en-en)": 79.45, - "STS17 (en-tr)": 72.07, - "STS17 (es-en)": 65.71, - "STS17 (es-es)": 80.83, - "STS17 (fr-en)": 76.98, - "STS17 (it-en)": 76.99, - "STS17 (ko-ko)": 71.32, - "STS17 (nl-en)": 75.22, - "STS22 (ar)": 57.67, - "STS22 (de)": 48.58, - "STS22 (de-en)": 50.14, - "STS22 (de-fr)": 53.28, - "STS22 (de-pl)": 58.69, - "STS22 (en)": 60.97, - "STS22 (es)": 63.18, - "STS22 (es-en)": 71.86, - "STS22 (es-it)": 69.69, - "STS22 (fr)": 77.95, - "STS22 (fr-pl)": 61.98, - "STS22 (it)": 72.22, - "STS22 (pl)": 39.28, - "STS22 (pl-en)": 69.41, - "STS22 (ru)": 57.49, - "STS22 (tr)": 58.15, - "STS22 (zh)": 63.02, - "STS22 (zh-en)": 64.02, - "STSBenchmark": 72.25, - "STSBenchmarkMultilingualSTS (fr)": 75.1 + "Model": "electra-small-nordic", + "AngryTweetsClassification": 47.91, + "DKHateClassification": 59.45, + "DanishPoliticalCommentsClassification": 31.89, + "LccSentimentClassification": 47.93, + "MassiveIntentClassification": 27.58, + "MassiveScenarioClassification": 29.93, + "NoRecClassification": 45.44, + "NordicLangClassification": 57.82, + "NorwegianParliament": 53.25, + "ScalaDaClassification": 70.41, + "ScalaNbClassification": 75.28 } ] }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, "Summarization": { - "cosine_spearman": [ - { - "Model": "LaBSE", - "SummEval": 31.05, - "SummEvalFr": 30.16 - }, + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "electra-small-swedish-cased-discriminator": { + "BitextMining": { + "f1": [ { - "Model": "LaBSE", - "SummEval": 31.05, - "SummEvalFr (fra-Latn)": 30.16 - }, + "Model": "electra-small-swedish-cased-discriminator", + "BornholmBitextMining": 0.85 + } + ] + }, + "Classification": { + "accuracy": [ { - "Model": "LaBSE", - "SummEval": 31.05, - "SummEvalFr (fra-Latn)": 30.16 + "Model": "electra-small-swedish-cased-discriminator", + "AngryTweetsClassification": 40.52, + "DKHateClassification": 52.28, + "DanishPoliticalCommentsClassification": 25.17, + "LccSentimentClassification": 36.67, + "MassiveIntentClassification": 6.6, + "MassiveScenarioClassification": 12.16, + "NoRecClassification": 39.72, + "NordicLangClassification": 44.53, + "NorwegianParliament": 52.44, + "ScalaDaClassification": 51.66, + "ScalaNbClassification": 52.41 } ] }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "LaBSE", - "CEDRClassification (rus-Cyrl)": 40.61, - "SensitiveTopicsClassification (rus-Cyrl)": 22.23 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "LaBSE", - "Core17InstructionRetrieval": 1.49, - "News21InstructionRetrieval": -1.11, - "Robust04InstructionRetrieval": -9.37 - } - ] + "p-MRR": [] } }, - "text-similarity-ada-001": { + "elser-v2": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "text-similarity-ada-001", - "AmazonCounterfactualClassification (en)": 76.4, - "AmazonPolarityClassification": 92.83, - "AmazonReviewsClassification (en)": 47.45, - "Banking77Classification": 68.04, - "EmotionClassification": 50.33, - "ImdbClassification": 89.38, - "MTOPDomainClassification (en)": 89.89, - "MTOPIntentClassification (en)": 64.8, - "MassiveIntentClassification (en)": 65.17, - "MassiveScenarioClassification (en)": 67.67, - "ToxicConversationsClassification": 70.0, - "TweetSentimentExtractionClassification": 63.35 + "Model": "elser-v2", + "AmazonCounterfactualClassification": 74.16, + "AmazonPolarityClassification": 61.91, + "AmazonReviewsClassification": 32.06, + "Banking77Classification": 82.05, + "EmotionClassification": 46.65, + "ImdbClassification": 65.02, + "MTOPDomainClassification": 93.17, + "MTOPIntentClassification": 71.1, + "MassiveIntentClassification": 68.48, + "MassiveScenarioClassification": 74.98, + "ToxicConversationsClassification": 68.15, + "TweetSentimentExtractionClassification": 53.57 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-similarity-ada-001", - "ArxivClusteringP2P": 41.49, - "ArxivClusteringS2S": 28.47, - "BiorxivClusteringP2P": 36.86, - "BiorxivClusteringS2S": 27.55, - "MedrxivClusteringP2P": 31.09, - "MedrxivClusteringS2S": 26.5, - "RedditClustering": 42.47, - "RedditClusteringP2P": 58.1, - "StackExchangeClustering": 53.52, - "StackExchangeClusteringP2P": 30.43, - "TwentyNewsgroupsClustering": 36.26 + "Model": "elser-v2", + "ArxivClusteringP2P": 35.27, + "ArxivClusteringS2S": 23.18, + "BiorxivClusteringP2P": 31.13, + "BiorxivClusteringS2S": 26.78, + "MedrxivClusteringP2P": 24.65, + "MedrxivClusteringS2S": 24.21, + "RedditClustering": 38.74, + "RedditClusteringP2P": 51.92, + "StackExchangeClustering": 42.7, + "StackExchangeClusteringP2P": 28.7, + "TwentyNewsgroupsClustering": 27.82 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-similarity-ada-001", - "SprintDuplicateQuestions": 77.85, - "TwitterSemEval2015": 69.04, - "TwitterURLCorpus": 83.69 + "Model": "elser-v2", + "SprintDuplicateQuestions": 94.53, + "TwitterSemEval2015": 64.41, + "TwitterURLCorpus": 85.01 }, { - "Model": "text-similarity-ada-001", - "SprintDuplicateQuestions": 78.07, - "TwitterSemEval2015": 69.04, - "TwitterURLCorpus": 83.69 + "Model": "elser-v2", + "SprintDuplicateQuestions": 94.53, + "TwitterSemEval2015": 64.41, + "TwitterURLCorpus": 85.01 } ] }, "Reranking": { "map": [ { - "Model": "text-similarity-ada-001", - "AskUbuntuDupQuestions": 53.49, - "MindSmallReranking": 30.71, - "SciDocsRR": 71.04, - "StackOverflowDupQuestions": 40.85 + "Model": "elser-v2", + "AskUbuntuDupQuestions": 58.31, + "MindSmallReranking": 30.75, + "SciDocsRR": 75.62, + "StackOverflowDupQuestions": 48.4 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-similarity-ada-001", - "ArguAna": 39.65, - "CQADupstackRetrieval": 10.17, - "ClimateFEVER": 2.83, - "DBPedia": 3.48, - "FEVER": 4.45, - "FiQA2018": 7.54, - "HotpotQA": 12.6, - "MSMARCO": 10.53, - "NFCorpus": 20.59, - "NQ": 2.02, - "QuoraRetrieval": 82.18, - "SCIDOCS": 6.28, - "SciFact": 45.46, - "TRECCOVID": 24.56, - "Touche2020": 3.1 + "Model": "elser-v2", + "ArguAna": 55.98, + "CQADupstackRetrieval": 34.27, + "ClimateFEVER": 27.08, + "DBPedia": 42.7, + "FEVER": 78.55, + "FiQA2018": 41.57, + "HotpotQA": 67.01, + "MSMARCO": 38.9, + "NFCorpus": 36.66, + "NQ": 55.84, + "QuoraRetrieval": 84.69, + "SCIDOCS": 16.24, + "SciFact": 71.8, + "TRECCOVID": 72.72, + "Touche2020": 26.27 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-similarity-ada-001", - "BIOSSES": 78.04, - "SICK-R": 77.48, - "STS12": 72.3, - "STS13": 81.49, - "STS14": 74.74, - "STS15": 84.28, - "STS16": 82.06, - "STS17 (en-en)": 87.08, - "STS22 (en)": 64.71, - "STSBenchmark": 83.78 + "Model": "elser-v2", + "BIOSSES": 83.79, + "SICK-R": 68.78, + "STS12": 64.81, + "STS13": 80.1, + "STS14": 74.96, + "STS15": 83.7, + "STS16": 80.55, + "STS17": 85.74, + "STS22": 67.5, + "STSBenchmark": 79.54 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "text-similarity-ada-001", - "SummEval": 26.94 + "Model": "elser-v2", + "SummEval": 31.03 } ] }, @@ -9937,91 +9750,165 @@ "p-MRR": [] } }, - "xlm-roberta-large": { + "flan-t5-base": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "flan-t5-base", + "Core17InstructionRetrieval": -3.31, + "News21InstructionRetrieval": -0.12, + "Robust04InstructionRetrieval": 5.35 + } + ] + } + }, + "flan-t5-large": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "flan-t5-large", + "Core17InstructionRetrieval": 1.32, + "News21InstructionRetrieval": 8.95, + "Robust04InstructionRetrieval": 3.9 + } + ] + } + }, + "flaubert_base_cased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "xlm-roberta-large", - "AmazonReviewsClassification (fr)": 26.62, - "MTOPDomainClassification (fr)": 36.77, - "MTOPIntentClassification (fr)": 15.37, - "MasakhaNEWSClassification (fra)": 65.76, - "MassiveIntentClassification (fr)": 15.82, - "MassiveScenarioClassification (fr)": 23.92 + "Model": "flaubert_base_cased", + "AmazonReviewsClassification": 24.9, + "MTOPDomainClassification": 25.55, + "MTOPIntentClassification": 9.49, + "MasakhaNEWSClassification": 71.14, + "MassiveIntentClassification": 6.98, + "MassiveScenarioClassification": 11.41 } ] }, "Clustering": { "v_measure": [ { - "Model": "xlm-roberta-large", - "AlloProfClusteringP2P": 56.54, - "AlloProfClusteringS2S": 21.18, - "BlurbsClusteringP2P": 29.84, - "BlurbsClusteringS2S": 7.29, - "HALClusteringS2S": 5.94, - "MLSUMClusteringP2P": 42.67, - "MLSUMClusteringS2S": 18.5, - "MasakhaNEWSClusteringP2P (fra)": 34.02, - "MasakhaNEWSClusteringS2S (fra)": 21.52, - "TenKGnadClusteringP2P": 32.46, - "TenKGnadClusteringS2S": 6.16 + "Model": "flaubert_base_cased", + "AlloProfClusteringP2P": 52.86, + "AlloProfClusteringS2S": 14.46, + "HALClusteringS2S": 3.85, + "MLSUMClusteringP2P": 39.06, + "MLSUMClusteringS2S": 17.13, + "MasakhaNEWSClusteringP2P": 41.61, + "MasakhaNEWSClusteringS2S": 21.26 } ] }, "PairClassification": { "max_ap": [ { - "Model": "xlm-roberta-large", - "OpusparcusPC (fr)": 83.73, - "PawsXPairClassification (fr)": 53.38 + "Model": "flaubert_base_cased", + "OpusparcusPC": 82.15, + "PawsXPairClassification": 51.89 }, { - "Model": "xlm-roberta-large", - "OpusparcusPC (fr)": 83.79, - "PawsXPairClassification (fr)": 53.44 + "Model": "flaubert_base_cased", + "OpusparcusPC": 82.15, + "PawsXPairClassification": 52.19 } ] }, "Reranking": { "map": [ { - "Model": "xlm-roberta-large", - "AlloprofReranking": 28.62, - "SyntecReranking": 49.4 + "Model": "flaubert_base_cased", + "AlloprofReranking": 34.81, + "SyntecReranking": 55.88 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "xlm-roberta-large", - "AlloprofRetrieval": 0.52, + "Model": "flaubert_base_cased", + "AlloprofRetrieval": 1.63, "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.9, - "SyntecRetrieval": 6.6, - "XPQARetrieval (fr)": 12.7 + "MintakaRetrieval": 0.58, + "SyntecRetrieval": 20.56, + "XPQARetrieval": 6.59 } ] }, "STS": { "cosine_spearman": [ { - "Model": "xlm-roberta-large", - "SICKFr": 50.01, - "STS22 (fr)": 55.49, - "STSBenchmarkMultilingualSTS (fr)": 42.32 + "Model": "flaubert_base_cased", + "SICKFr": 53.86, + "STS22": 65.37, + "STSBenchmarkMultilingualSTS": 37.14 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "xlm-roberta-large", - "SummEvalFr": 28.89 + "Model": "flaubert_base_cased", + "SummEvalFr": 31.26 } ] }, @@ -10032,49 +9919,89 @@ "p-MRR": [] } }, - "bge-small-en-v1.5": { + "flaubert_base_uncased": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "flaubert_base_uncased", + "AmazonReviewsClassification": 23.52, + "MTOPDomainClassification": 27.74, + "MTOPIntentClassification": 8.61, + "MasakhaNEWSClassification": 62.61, + "MassiveIntentClassification": 6.24, + "MassiveScenarioClassification": 10.98 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "flaubert_base_uncased", + "AlloProfClusteringP2P": 43.2, + "AlloProfClusteringS2S": 12.94, + "HALClusteringS2S": 1.8, + "MLSUMClusteringP2P": 33.22, + "MLSUMClusteringS2S": 14.9, + "MasakhaNEWSClusteringP2P": 28.49, + "MasakhaNEWSClusteringS2S": 22.58 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "flaubert_base_uncased", + "OpusparcusPC": 82.0, + "PawsXPairClassification": 52.78 + }, + { + "Model": "flaubert_base_uncased", + "OpusparcusPC": 82.0, + "PawsXPairClassification": 52.89 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "flaubert_base_uncased", + "AlloprofReranking": 34.55, + "SyntecReranking": 57.18 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-small-en-v1.5", - "ARCChallenge": 8.95, - "AlphaNLI": 11.64, - "HellaSwag": 25.44, - "PIQA": 23.92, - "Quail": 1.75, - "RARbCode": 42.36, - "RARbMath": 44.98, - "SIQA": 0.77, - "SpartQA": 3.55, - "TempReasonL1": 1.41, - "TempReasonL2Fact": 17.56, - "TempReasonL2Pure": 1.05, - "TempReasonL3Fact": 13.88, - "TempReasonL3Pure": 4.76, - "WinoGrande": 10.28 + "Model": "flaubert_base_uncased", + "AlloprofRetrieval": 1.72, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 0.51, + "SyntecRetrieval": 22.33, + "XPQARetrieval": 9.09 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "flaubert_base_uncased", + "SICKFr": 41.9, + "STS22": 55.15, + "STSBenchmarkMultilingualSTS": 33.41 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "flaubert_base_uncased", + "SummEvalFr": 29.43 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -10083,447 +10010,130 @@ "p-MRR": [] } }, - "monot5-3b-msmarco-10k": { + "flaubert_large_cased": { "BitextMining": { "f1": [] }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "monot5-3b-msmarco-10k", - "Core17InstructionRetrieval": 1.84, - "News21InstructionRetrieval": 1.78, - "Robust04InstructionRetrieval": 3.96 - } - ] - } - }, - "LASER2": { - "BitextMining": { - "f1": [ - { - "Model": "LASER2", - "BUCC (de-en)": 99.21, - "BUCC (fr-en)": 98.39, - "BUCC (ru-en)": 97.62, - "BUCC (zh-en)": 97.7, - "Tatoeba (afr-eng)": 92.59, - "Tatoeba (amh-eng)": 80.82, - "Tatoeba (ang-eng)": 25.22, - "Tatoeba (ara-eng)": 90.14, - "Tatoeba (arq-eng)": 26.63, - "Tatoeba (arz-eng)": 66.16, - "Tatoeba (ast-eng)": 76.35, - "Tatoeba (awa-eng)": 33.74, - "Tatoeba (aze-eng)": 82.41, - "Tatoeba (bel-eng)": 79.54, - "Tatoeba (ben-eng)": 89.43, - "Tatoeba (ber-eng)": 77.63, - "Tatoeba (bos-eng)": 95.86, - "Tatoeba (bre-eng)": 31.2, - "Tatoeba (bul-eng)": 93.57, - "Tatoeba (cat-eng)": 95.8, - "Tatoeba (cbk-eng)": 77.17, - "Tatoeba (ceb-eng)": 9.93, - "Tatoeba (ces-eng)": 95.52, - "Tatoeba (cha-eng)": 14.86, - "Tatoeba (cmn-eng)": 85.62, - "Tatoeba (cor-eng)": 4.45, - "Tatoeba (csb-eng)": 27.03, - "Tatoeba (cym-eng)": 5.85, - "Tatoeba (dan-eng)": 95.22, - "Tatoeba (deu-eng)": 99.07, - "Tatoeba (dsb-eng)": 42.34, - "Tatoeba (dtp-eng)": 7.39, - "Tatoeba (ell-eng)": 96.2, - "Tatoeba (epo-eng)": 96.61, - "Tatoeba (est-eng)": 96.43, - "Tatoeba (eus-eng)": 93.32, - "Tatoeba (fao-eng)": 57.04, - "Tatoeba (fin-eng)": 96.98, - "Tatoeba (fra-eng)": 94.28, - "Tatoeba (fry-eng)": 42.07, - "Tatoeba (gla-eng)": 1.52, - "Tatoeba (gle-eng)": 4.2, - "Tatoeba (glg-eng)": 96.14, - "Tatoeba (gsw-eng)": 27.52, - "Tatoeba (heb-eng)": 0.0, - "Tatoeba (hin-eng)": 95.32, - "Tatoeba (hrv-eng)": 96.72, - "Tatoeba (hsb-eng)": 45.75, - "Tatoeba (hun-eng)": 95.2, - "Tatoeba (hye-eng)": 88.72, - "Tatoeba (ido-eng)": 80.86, - "Tatoeba (ile-eng)": 87.88, - "Tatoeba (ina-eng)": 93.93, - "Tatoeba (ind-eng)": 92.98, - "Tatoeba (isl-eng)": 94.32, - "Tatoeba (ita-eng)": 94.32, - "Tatoeba (jav-eng)": 9.95, - "Tatoeba (jpn-eng)": 93.78, - "Tatoeba (kab-eng)": 65.88, - "Tatoeba (kat-eng)": 81.16, - "Tatoeba (kaz-eng)": 53.3, - "Tatoeba (khm-eng)": 74.19, - "Tatoeba (kor-eng)": 87.97, - "Tatoeba (kur-eng)": 19.09, - "Tatoeba (kzj-eng)": 4.46, - "Tatoeba (lat-eng)": 64.81, - "Tatoeba (lfn-eng)": 63.39, - "Tatoeba (lit-eng)": 96.2, - "Tatoeba (lvs-eng)": 95.33, - "Tatoeba (mal-eng)": 98.16, - "Tatoeba (mar-eng)": 92.93, - "Tatoeba (max-eng)": 36.96, - "Tatoeba (mhr-eng)": 6.86, - "Tatoeba (mkd-eng)": 93.63, - "Tatoeba (mon-eng)": 3.42, - "Tatoeba (nds-eng)": 77.13, - "Tatoeba (nld-eng)": 95.35, - "Tatoeba (nno-eng)": 72.75, - "Tatoeba (nob-eng)": 95.77, - "Tatoeba (nov-eng)": 60.02, - "Tatoeba (oci-eng)": 58.13, - "Tatoeba (orv-eng)": 23.24, - "Tatoeba (pam-eng)": 3.24, - "Tatoeba (pes-eng)": 93.13, - "Tatoeba (pms-eng)": 36.23, - "Tatoeba (pol-eng)": 97.32, - "Tatoeba (por-eng)": 94.54, - "Tatoeba (ron-eng)": 96.52, - "Tatoeba (rus-eng)": 92.58, - "Tatoeba (slk-eng)": 95.82, - "Tatoeba (slv-eng)": 95.4, - "Tatoeba (spa-eng)": 97.33, - "Tatoeba (sqi-eng)": 97.22, - "Tatoeba (srp-eng)": 93.64, - "Tatoeba (swe-eng)": 95.31, - "Tatoeba (swg-eng)": 33.1, - "Tatoeba (swh-eng)": 55.66, - "Tatoeba (tam-eng)": 87.32, - "Tatoeba (tat-eng)": 34.74, - "Tatoeba (tel-eng)": 96.72, - "Tatoeba (tgl-eng)": 63.19, - "Tatoeba (tha-eng)": 96.38, - "Tatoeba (tuk-eng)": 16.35, - "Tatoeba (tur-eng)": 98.03, - "Tatoeba (tzl-eng)": 36.56, - "Tatoeba (uig-eng)": 56.49, - "Tatoeba (ukr-eng)": 93.52, - "Tatoeba (urd-eng)": 84.23, - "Tatoeba (uzb-eng)": 23.2, - "Tatoeba (vie-eng)": 96.73, - "Tatoeba (war-eng)": 8.25, - "Tatoeba (wuu-eng)": 75.09, - "Tatoeba (xho-eng)": 4.68, - "Tatoeba (yid-eng)": 2.49, - "Tatoeba (yue-eng)": 87.75, - "Tatoeba (zsm-eng)": 95.41 - } - ] - }, "Classification": { "accuracy": [ { - "Model": "LASER2", - "AmazonCounterfactualClassification (de)": 67.82, - "AmazonCounterfactualClassification (en)": 76.84, - "AmazonCounterfactualClassification (en-ext)": 76.17, - "AmazonCounterfactualClassification (ja)": 68.76, - "AmazonPolarityClassification": 61.01, - "AmazonReviewsClassification (de)": 31.07, - "AmazonReviewsClassification (en)": 28.71, - "AmazonReviewsClassification (es)": 32.72, - "AmazonReviewsClassification (fr)": 31.12, - "AmazonReviewsClassification (ja)": 28.94, - "AmazonReviewsClassification (zh)": 30.89, - "Banking77Classification": 57.76, - "EmotionClassification": 24.83, - "ImdbClassification": 57.58, - "MTOPDomainClassification (de)": 74.08, - "MTOPDomainClassification (en)": 75.36, - "MTOPDomainClassification (es)": 73.47, - "MTOPDomainClassification (fr)": 72.26, - "MTOPDomainClassification (hi)": 72.95, - "MTOPDomainClassification (th)": 72.68, - "MTOPIntentClassification (de)": 51.62, - "MTOPIntentClassification (en)": 49.47, - "MTOPIntentClassification (es)": 52.75, - "MTOPIntentClassification (fr)": 50.12, - "MTOPIntentClassification (hi)": 45.55, - "MTOPIntentClassification (th)": 50.07, - "MasakhaNEWSClassification (fra)": 65.9, - "MassiveIntentClassification (af)": 38.01, - "MassiveIntentClassification (am)": 12.7, - "MassiveIntentClassification (ar)": 37.16, - "MassiveIntentClassification (az)": 19.98, - "MassiveIntentClassification (bn)": 42.51, - "MassiveIntentClassification (cy)": 17.33, - "MassiveIntentClassification (da)": 45.61, - "MassiveIntentClassification (de)": 44.79, - "MassiveIntentClassification (el)": 46.71, - "MassiveIntentClassification (en)": 47.91, - "MassiveIntentClassification (es)": 45.44, - "MassiveIntentClassification (fa)": 45.01, - "MassiveIntentClassification (fi)": 45.94, - "MassiveIntentClassification (fr)": 46.13, - "MassiveIntentClassification (he)": 42.55, - "MassiveIntentClassification (hi)": 40.2, - "MassiveIntentClassification (hu)": 42.77, - "MassiveIntentClassification (hy)": 28.07, - "MassiveIntentClassification (id)": 45.81, - "MassiveIntentClassification (is)": 39.86, - "MassiveIntentClassification (it)": 48.25, - "MassiveIntentClassification (ja)": 45.3, - "MassiveIntentClassification (jv)": 24.3, - "MassiveIntentClassification (ka)": 22.7, - "MassiveIntentClassification (km)": 22.48, - "MassiveIntentClassification (kn)": 4.32, - "MassiveIntentClassification (ko)": 44.26, - "MassiveIntentClassification (lv)": 39.75, - "MassiveIntentClassification (ml)": 41.33, - "MassiveIntentClassification (mn)": 16.2, - "MassiveIntentClassification (ms)": 43.23, - "MassiveIntentClassification (my)": 25.37, - "MassiveIntentClassification (nb)": 37.74, - "MassiveIntentClassification (nl)": 45.0, - "MassiveIntentClassification (pl)": 44.99, - "MassiveIntentClassification (pt)": 48.55, - "MassiveIntentClassification (ro)": 44.3, - "MassiveIntentClassification (ru)": 44.29, - "MassiveIntentClassification (sl)": 44.72, - "MassiveIntentClassification (sq)": 46.12, - "MassiveIntentClassification (sv)": 45.95, - "MassiveIntentClassification (sw)": 31.89, - "MassiveIntentClassification (ta)": 29.63, - "MassiveIntentClassification (te)": 36.03, - "MassiveIntentClassification (th)": 43.39, - "MassiveIntentClassification (tl)": 29.73, - "MassiveIntentClassification (tr)": 43.93, - "MassiveIntentClassification (ur)": 26.11, - "MassiveIntentClassification (vi)": 44.33, - "MassiveIntentClassification (zh-CN)": 40.62, - "MassiveIntentClassification (zh-TW)": 32.93, - "MassiveScenarioClassification (af)": 47.1, - "MassiveScenarioClassification (am)": 17.7, - "MassiveScenarioClassification (ar)": 45.21, - "MassiveScenarioClassification (az)": 28.21, - "MassiveScenarioClassification (bn)": 50.52, - "MassiveScenarioClassification (cy)": 22.58, - "MassiveScenarioClassification (da)": 54.87, - "MassiveScenarioClassification (de)": 54.34, - "MassiveScenarioClassification (el)": 55.47, - "MassiveScenarioClassification (en)": 55.92, - "MassiveScenarioClassification (es)": 52.77, - "MassiveScenarioClassification (fa)": 52.5, - "MassiveScenarioClassification (fi)": 52.63, - "MassiveScenarioClassification (fr)": 54.32, - "MassiveScenarioClassification (he)": 52.41, - "MassiveScenarioClassification (hi)": 47.37, - "MassiveScenarioClassification (hu)": 53.43, - "MassiveScenarioClassification (hy)": 33.57, - "MassiveScenarioClassification (id)": 54.38, - "MassiveScenarioClassification (is)": 49.78, - "MassiveScenarioClassification (it)": 54.84, - "MassiveScenarioClassification (ja)": 54.12, - "MassiveScenarioClassification (jv)": 32.71, - "MassiveScenarioClassification (ka)": 26.92, - "MassiveScenarioClassification (km)": 27.23, - "MassiveScenarioClassification (kn)": 10.06, - "MassiveScenarioClassification (ko)": 52.01, - "MassiveScenarioClassification (lv)": 44.82, - "MassiveScenarioClassification (ml)": 49.1, - "MassiveScenarioClassification (mn)": 21.51, - "MassiveScenarioClassification (ms)": 53.6, - "MassiveScenarioClassification (my)": 29.72, - "MassiveScenarioClassification (nb)": 43.9, - "MassiveScenarioClassification (nl)": 53.33, - "MassiveScenarioClassification (pl)": 52.92, - "MassiveScenarioClassification (pt)": 53.41, - "MassiveScenarioClassification (ro)": 50.48, - "MassiveScenarioClassification (ru)": 51.84, - "MassiveScenarioClassification (sl)": 51.29, - "MassiveScenarioClassification (sq)": 55.65, - "MassiveScenarioClassification (sv)": 54.64, - "MassiveScenarioClassification (sw)": 42.04, - "MassiveScenarioClassification (ta)": 36.72, - "MassiveScenarioClassification (te)": 42.08, - "MassiveScenarioClassification (th)": 52.15, - "MassiveScenarioClassification (tl)": 37.34, - "MassiveScenarioClassification (tr)": 52.56, - "MassiveScenarioClassification (ur)": 32.6, - "MassiveScenarioClassification (vi)": 50.97, - "MassiveScenarioClassification (zh-CN)": 50.22, - "MassiveScenarioClassification (zh-TW)": 42.32, - "ToxicConversationsClassification": 54.05, - "TweetSentimentExtractionClassification": 48.73 + "Model": "flaubert_large_cased", + "AmazonReviewsClassification": 22.45, + "MTOPDomainClassification": 24.27, + "MTOPIntentClassification": 9.79, + "MasakhaNEWSClassification": 55.64, + "MassiveIntentClassification": 16.41, + "MassiveScenarioClassification": 22.72 } ] }, "Clustering": { "v_measure": [ { - "Model": "LASER2", - "AlloProfClusteringP2P": 48.45, - "AlloProfClusteringS2S": 25.81, - "ArxivClusteringP2P": 17.77, - "ArxivClusteringS2S": 12.39, - "BiorxivClusteringP2P": 12.4, - "BiorxivClusteringS2S": 8.83, - "HALClusteringS2S": 11.52, - "MLSUMClusteringP2P": 34.53, - "MLSUMClusteringS2S": 27.35, - "MasakhaNEWSClusteringP2P (fra)": 32.04, - "MasakhaNEWSClusteringS2S (fra)": 29.77, - "MedrxivClusteringP2P": 17.91, - "MedrxivClusteringS2S": 16.63, - "RedditClustering": 9.96, - "RedditClusteringP2P": 26.42, - "StackExchangeClustering": 15.79, - "StackExchangeClusteringP2P": 18.63, - "TwentyNewsgroupsClustering": 11.38 + "Model": "flaubert_large_cased", + "AlloProfClusteringP2P": 40.85, + "AlloProfClusteringS2S": 21.76, + "HALClusteringS2S": 5.26, + "MLSUMClusteringP2P": 38.09, + "MLSUMClusteringS2S": 18.71, + "MasakhaNEWSClusteringP2P": 26.43, + "MasakhaNEWSClusteringS2S": 24.68 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LASER2", - "OpusparcusPC (fr)": 93.77, - "PawsXPairClassification (fr)": 69.53, - "SprintDuplicateQuestions": 65.54, - "TwitterSemEval2015": 59.57, - "TwitterURLCorpus": 81.47 + "Model": "flaubert_large_cased", + "OpusparcusPC": 74.78, + "PawsXPairClassification": 54.14 }, { - "Model": "LASER2", - "OpusparcusPC (fr)": 93.77, - "PawsXPairClassification (fr)": 70.31, - "SprintDuplicateQuestions": 68.48, - "TwitterSemEval2015": 59.57, - "TwitterURLCorpus": 81.47 + "Model": "flaubert_large_cased", + "OpusparcusPC": 85.91, + "PawsXPairClassification": 54.94 } ] }, "Reranking": { "map": [ { - "Model": "LASER2", - "AlloprofReranking": 35.29, - "AskUbuntuDupQuestions": 48.99, - "MindSmallReranking": 24.79, - "SciDocsRR": 54.99, - "StackOverflowDupQuestions": 36.98, - "SyntecReranking": 55.93 + "Model": "flaubert_large_cased", + "AlloprofReranking": 26.29, + "SyntecReranking": 42.8 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LASER2", - "AlloprofRetrieval": 3.1, - "ArguAna": 12.86, - "BSARDRetrieval": 0.36, - "CQADupstackRetrieval": 4.12, - "ClimateFEVER": 0.36, - "DBPedia": 1.53, - "FEVER": 0.77, - "FiQA2018": 1.73, - "HotpotQA": 5.5, - "MSMARCO": 1.09, - "MintakaRetrieval (fr)": 6.31, - "NFCorpus": 2.44, - "NQ": 0.64, - "QuoraRetrieval": 71.14, - "SCIDOCS": 0.78, - "SciFact": 4.04, - "SyntecRetrieval": 28.58, - "TRECCOVID": 10.97, - "Touche2020": 1.06, - "XPQARetrieval (fr)": 42.59 + "Model": "flaubert_large_cased", + "AlloprofRetrieval": 0.58, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 0.26, + "SyntecRetrieval": 1.58, + "XPQARetrieval": 3.69 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LASER2", - "BIOSSES": 62.01, - "SICK-R": 62.86, - "SICKFr": 64.95, - "STS12": 62.6, - "STS13": 59.62, - "STS14": 57.03, - "STS15": 71.57, - "STS16": 70.75, - "STS17 (ar-ar)": 67.47, - "STS17 (en-ar)": 65.05, - "STS17 (en-de)": 66.66, - "STS17 (en-en)": 76.73, - "STS17 (en-tr)": 70.05, - "STS17 (es-en)": 55.3, - "STS17 (es-es)": 79.67, - "STS17 (fr-en)": 70.82, - "STS17 (it-en)": 70.98, - "STS17 (ko-ko)": 70.52, - "STS17 (nl-en)": 68.12, - "STS22 (ar)": 42.57, - "STS22 (de)": 25.69, - "STS22 (de-en)": 32.35, - "STS22 (de-fr)": 37.41, - "STS22 (de-pl)": 15.67, - "STS22 (en)": 39.76, - "STS22 (es)": 54.92, - "STS22 (es-en)": 54.34, - "STS22 (es-it)": 42.21, - "STS22 (fr)": 58.61, - "STS22 (fr-pl)": 39.44, - "STS22 (it)": 60.31, - "STS22 (pl)": 18.34, - "STS22 (pl-en)": 53.63, - "STS22 (ru)": 39.24, - "STS22 (tr)": 36.97, - "STS22 (zh)": 49.41, - "STS22 (zh-en)": 46.19, - "STSBenchmark": 69.77, - "STSBenchmarkMultilingualSTS (fr)": 69.82 + "Model": "flaubert_large_cased", + "SICKFr": 34.6, + "STS22": 48.52, + "STSBenchmarkMultilingualSTS": 15.66 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LASER2", - "SummEval": 26.8, - "SummEvalFr": 31.56 + "Model": "flaubert_large_cased", + "SummEvalFr": 29.25 + } + ] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "gbert-base": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [ + { + "Model": "gbert-base", + "BlurbsClusteringP2P": 35.36, + "BlurbsClusteringS2S": 11.27, + "TenKGnadClusteringP2P": 37.16, + "TenKGnadClusteringS2S": 24.23 } ] }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, "MultilabelClassification": { "accuracy": [] }, @@ -10571,89 +10181,38 @@ "p-MRR": [] } }, - "sentence-camembert-large": { + "gelectra-base": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "sentence-camembert-large", - "AmazonReviewsClassification (fr)": 37.97, - "MTOPDomainClassification (fr)": 85.74, - "MTOPIntentClassification (fr)": 58.62, - "MasakhaNEWSClassification (fra)": 80.62, - "MassiveIntentClassification (fr)": 62.65, - "MassiveScenarioClassification (fr)": 69.29 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "sentence-camembert-large", - "AlloProfClusteringP2P": 62.69, - "AlloProfClusteringS2S": 42.06, - "HALClusteringS2S": 23.9, - "MLSUMClusteringP2P": 42.04, - "MLSUMClusteringS2S": 32.29, - "MasakhaNEWSClusteringP2P (fra)": 54.51, - "MasakhaNEWSClusteringS2S (fra)": 44.73 + "Model": "gelectra-base", + "BlurbsClusteringP2P": 10.06, + "BlurbsClusteringS2S": 7.74, + "TenKGnadClusteringP2P": 9.02, + "TenKGnadClusteringS2S": 4.11 } ] }, "PairClassification": { - "max_ap": [ - { - "Model": "sentence-camembert-large", - "OpusparcusPC (fr)": 94.63, - "PawsXPairClassification (fr)": 59.59 - }, - { - "Model": "sentence-camembert-large", - "OpusparcusPC (fr)": 94.63, - "PawsXPairClassification (fr)": 59.61 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "sentence-camembert-large", - "AlloprofReranking": 57.62, - "SyntecReranking": 88.15 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "sentence-camembert-large", - "AlloprofRetrieval": 31.62, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 21.87, - "SyntecRetrieval": 81.11, - "XPQARetrieval (fr)": 65.62 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "sentence-camembert-large", - "SICKFr": 77.7, - "STS22 (fr)": 81.73, - "STSBenchmarkMultilingualSTS (fr)": 85.79 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "sentence-camembert-large", - "SummEvalFr": 30.88 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -10662,7 +10221,7 @@ "p-MRR": [] } }, - "bge-small-en-v1.5-instruct": { + "gelectra-large": { "BitextMining": { "f1": [] }, @@ -10670,7 +10229,15 @@ "accuracy": [] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "gelectra-large", + "BlurbsClusteringP2P": 13.96, + "BlurbsClusteringS2S": 7.57, + "TenKGnadClusteringP2P": 11.49, + "TenKGnadClusteringS2S": 3.91 + } + ] }, "PairClassification": { "max_ap": [] @@ -10678,33 +10245,141 @@ "Reranking": { "map": [] }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "glove.6B.300d": { + "BitextMining": { + "f1": [ + { + "Model": "glove.6B.300d", + "BUCC": 0.0 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "glove.6B.300d", + "AmazonCounterfactualClassification": 56.91, + "AmazonPolarityClassification": 60.32, + "AmazonReviewsClassification": 29.67, + "Banking77Classification": 67.69, + "EmotionClassification": 36.93, + "ImdbClassification": 62.57, + "MTOPDomainClassification": 79.11, + "MTOPIntentClassification": 55.85, + "MassiveIntentClassification": 56.19, + "MassiveScenarioClassification": 66.03, + "ToxicConversationsClassification": 65.4, + "TweetSentimentExtractionClassification": 50.8 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "glove.6B.300d", + "ArxivClusteringP2P": 32.56, + "ArxivClusteringS2S": 23.14, + "BiorxivClusteringP2P": 29.27, + "BiorxivClusteringS2S": 19.18, + "MedrxivClusteringP2P": 26.12, + "MedrxivClusteringS2S": 20.38, + "RedditClustering": 28.46, + "RedditClusteringP2P": 35.82, + "StackExchangeClustering": 35.8, + "StackExchangeClusteringP2P": 28.51, + "TwentyNewsgroupsClustering": 25.83 + } + ] + }, + "PairClassification": { + "max_ap": [ + { + "Model": "glove.6B.300d", + "SprintDuplicateQuestions": 86.96, + "TwitterSemEval2015": 48.45, + "TwitterURLCorpus": 77.35 + }, + { + "Model": "glove.6B.300d", + "SprintDuplicateQuestions": 86.96, + "TwitterSemEval2015": 53.12, + "TwitterURLCorpus": 77.35 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "glove.6B.300d", + "AskUbuntuDupQuestions": 49.57, + "MindSmallReranking": 27.01, + "SciDocsRR": 62.56, + "StackOverflowDupQuestions": 34.03 + } + ] + }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-small-en-v1.5-instruct", - "ARCChallenge": 7.72, - "AlphaNLI": 1.26, - "HellaSwag": 23.41, - "PIQA": 20.79, - "Quail": 2.01, - "RARbCode": 41.52, - "RARbMath": 46.5, - "SIQA": 0.98, - "SpartQA": 2.86, - "TempReasonL1": 1.27, - "TempReasonL2Fact": 16.72, - "TempReasonL2Pure": 1.1, - "TempReasonL3Fact": 12.81, - "TempReasonL3Pure": 4.63, - "WinoGrande": 5.35 + "Model": "glove.6B.300d", + "ArguAna": 36.3, + "CQADupstackRetrieval": 15.47, + "ClimateFEVER": 14.44, + "DBPedia": 18.28, + "FEVER": 14.99, + "FiQA2018": 10.09, + "HotpotQA": 19.18, + "MSMARCO": 9.6, + "NFCorpus": 13.87, + "NQ": 12.87, + "QuoraRetrieval": 71.32, + "SCIDOCS": 8.04, + "SciFact": 29.58, + "TRECCOVID": 36.22, + "Touche2020": 13.99 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "glove.6B.300d", + "BIOSSES": 44.93, + "SICK-R": 55.43, + "STS12": 54.64, + "STS13": 69.16, + "STS14": 60.81, + "STS15": 72.31, + "STS16": 65.34, + "STS17": 77.95, + "STS22": 56.35, + "STSBenchmark": 61.54 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "glove.6B.300d", + "SummEval": 28.87 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -10713,198 +10388,118 @@ "p-MRR": [] } }, - "text-embedding-ada-002": { + "google-gecko-256.text-embedding-preview-0409": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-ada-002", - "AmazonCounterfactualClassification (en)": 75.94, - "AmazonPolarityClassification": 86.72, - "AmazonReviewsClassification (zh)": 38.3, - "AmazonReviewsClassification (en)": 44.78, - "AmazonReviewsClassification (fr)": 43.76, - "Banking77Classification": 80.66, - "EmotionClassification": 48.74, - "IFlyTek": 44.62, - "ImdbClassification": 77.98, - "JDReview": 74.6, - "MTOPDomainClassification (en)": 92.13, - "MTOPDomainClassification (fr)": 89.38, - "MTOPIntentClassification (en)": 64.68, - "MTOPIntentClassification (fr)": 64.45, - "MasakhaNEWSClassification (fra)": 81.52, - "MassiveIntentClassification (zh-CN)": 64.81, - "MassiveIntentClassification (en)": 70.15, - "MassiveIntentClassification (fr)": 65.42, - "MassiveScenarioClassification (zh-CN)": 71.4, - "MassiveScenarioClassification (en)": 75.33, - "MassiveScenarioClassification (fr)": 71.11, - "MultilingualSentiment": 67.99, - "OnlineShopping": 88.94, - "TNews": 45.77, - "ToxicConversationsClassification": 72.29, - "TweetSentimentExtractionClassification": 61.81, - "Waimai": 82.37 + "Model": "google-gecko-256.text-embedding-preview-0409", + "AmazonCounterfactualClassification": 70.93, + "AmazonPolarityClassification": 97.34, + "AmazonReviewsClassification": 48.47, + "Banking77Classification": 86.01, + "EmotionClassification": 51.53, + "ImdbClassification": 95.7, + "MTOPDomainClassification": 98.02, + "MTOPIntentClassification": 77.82, + "MassiveIntentClassification": 75.67, + "MassiveScenarioClassification": 85.16, + "ToxicConversationsClassification": 88.33, + "TweetSentimentExtractionClassification": 72.97 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-ada-002", - "AlloProfClusteringP2P": 64.83, - "AlloProfClusteringS2S": 53.52, - "ArxivClusteringP2P": 45.01, - "ArxivClusteringS2S": 36.85, - "BiorxivClusteringP2P": 36.66, - "BiorxivClusteringS2S": 34.21, - "CLSClusteringP2P": 38.26, - "CLSClusteringS2S": 35.91, - "HALClusteringS2S": 26.18, - "MLSUMClusteringP2P": 44.59, - "MLSUMClusteringS2S": 41.67, - "MasakhaNEWSClusteringP2P (fra)": 68.35, - "MasakhaNEWSClusteringS2S (fra)": 48.58, - "MedrxivClusteringP2P": 32.6, - "MedrxivClusteringS2S": 30.8, - "RedditClustering": 61.42, - "RedditClusteringP2P": 64.13, - "StackExchangeClustering": 72.22, - "StackExchangeClusteringP2P": 38.49, - "ThuNewsClusteringP2P": 58.71, - "ThuNewsClusteringS2S": 49.86, - "TwentyNewsgroupsClustering": 52.56 + "Model": "google-gecko-256.text-embedding-preview-0409", + "ArxivClusteringP2P": 44.12, + "ArxivClusteringS2S": 36.54, + "BiorxivClusteringP2P": 36.28, + "BiorxivClusteringS2S": 33.09, + "MedrxivClusteringP2P": 32.08, + "MedrxivClusteringS2S": 30.84, + "RedditClustering": 62.24, + "RedditClusteringP2P": 63.7, + "StackExchangeClustering": 70.19, + "StackExchangeClusteringP2P": 36.1, + "TwentyNewsgroupsClustering": 50.6 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-embedding-ada-002", - "Cmnli": 76.03, - "Ocnli": 63.08, - "OpusparcusPC (fr)": 94.12, - "PawsXPairClassification (fr)": 60.16, - "SprintDuplicateQuestions": 92.17, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 87.22 + "Model": "google-gecko-256.text-embedding-preview-0409", + "SprintDuplicateQuestions": 96.49, + "TwitterSemEval2015": 78.23, + "TwitterURLCorpus": 87.04 }, { - "Model": "text-embedding-ada-002", - "Cmnli": 76.04, - "Ocnli": 63.08, - "OpusparcusPC (fr)": 94.16, - "PawsXPairClassification (fr)": 60.19, - "SprintDuplicateQuestions": 92.17, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 87.22 + "Model": "google-gecko-256.text-embedding-preview-0409", + "SprintDuplicateQuestions": 96.49, + "TwitterSemEval2015": 78.25, + "TwitterURLCorpus": 87.12 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-ada-002", - "AskUbuntuDupQuestions": 62.05, - "CMedQAv1": 63.08, - "CMedQAv2": 64.02, - "MMarcoReranking": 23.39, - "MindSmallReranking": 31.45, - "SciDocsRR": 81.22, - "StackOverflowDupQuestions": 50.54, - "SyntecReranking": 89.87, - "T2Reranking": 66.65 + "Model": "google-gecko-256.text-embedding-preview-0409", + "AskUbuntuDupQuestions": 63.84, + "MindSmallReranking": 31.89, + "SciDocsRR": 81.62, + "StackOverflowDupQuestions": 53.76 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-ada-002", - "ARCChallenge": 13.3, - "AlloprofRetrieval": 51.64, - "AlphaNLI": 25.65, - "ArguAna": 57.44, - "BSARDRetrieval": 0.61, - "CQADupstackRetrieval": 41.69, - "ClimateFEVER": 21.64, - "CmedqaRetrieval": 22.36, - "CovidRetrieval": 57.21, - "DBPedia": 39.39, - "DuRetrieval": 71.17, - "EcomRetrieval": 44.49, - "FEVER": 74.99, - "FiQA2018": 44.41, - "HellaSwag": 29.29, - "HotpotQA": 60.9, - "MMarcoRetrieval": 69.86, - "MSMARCO": 40.91, - "MedicalRetrieval": 37.92, - "MintakaRetrieval (fr)": 29.94, - "NFCorpus": 36.97, - "NQ": 51.58, - "PIQA": 31.02, - "Quail": 5.83, - "QuoraRetrieval": 87.6, - "RARbCode": 83.39, - "RARbMath": 73.21, - "SCIDOCS": 18.36, - "SIQA": 3.14, - "SciFact": 72.75, - "SpartQA": 4.23, - "SyntecRetrieval": 85.97, - "T2Retrieval": 69.14, - "TRECCOVID": 68.47, - "TempReasonL1": 1.68, - "TempReasonL2Fact": 19.93, - "TempReasonL2Pure": 2.6, - "TempReasonL3Fact": 18.02, - "TempReasonL3Pure": 7.58, - "Touche2020": 21.61, - "VideoRetrieval": 43.85, - "WinoGrande": 19.65, - "XPQARetrieval (fr)": 73.0 + "Model": "google-gecko-256.text-embedding-preview-0409", + "ArguAna": 56.27, + "CQADupstackRetrieval": 45.41, + "ClimateFEVER": 29.35, + "DBPedia": 41.91, + "FEVER": 82.61, + "FiQA2018": 55.54, + "HotpotQA": 64.65, + "MSMARCO": 31.12, + "NFCorpus": 37.81, + "NQ": 57.37, + "QuoraRetrieval": 87.89, + "SCIDOCS": 18.21, + "SciFact": 70.86, + "TRECCOVID": 80.13, + "Touche2020": 27.4 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-embedding-ada-002", - "AFQMC": 23.88, - "ATEC": 29.25, - "BIOSSES": 86.35, - "BQ": 45.33, - "LCQMC": 68.41, - "PAWSX": 16.55, - "QBQTC": 30.27, - "SICK-R": 80.6, - "SICKFr": 76.28, - "STS12": 69.8, - "STS13": 83.27, - "STS14": 76.09, - "STS15": 86.12, - "STS16": 85.96, - "STS17 (en-en)": 90.25, - "STS22 (zh)": 62.53, - "STS22 (en)": 68.12, - "STS22 (tr)": 64.5, - "STS22 (fr)": 81.09, - "STSB": 70.61, - "STSBenchmark": 83.17, - "STSBenchmarkMultilingualSTS (fr)": 77.55 + "Model": "google-gecko-256.text-embedding-preview-0409", + "BIOSSES": 89.42, + "SICK-R": 81.67, + "STS12": 78.02, + "STS13": 90.1, + "STS14": 85.44, + "STS15": 89.64, + "STS16": 87.24, + "STS17": 90.46, + "STS22": 67.99, + "STSBenchmark": 89.33 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "text-embedding-ada-002", - "SummEval": 30.8, - "SummEvalFr": 30.5 + "Model": "google-gecko-256.text-embedding-preview-0409", + "SummEval": 32.36 } ] }, @@ -10915,135 +10510,192 @@ "p-MRR": [] } }, - "text-search-babbage-001": { + "google-gecko.text-embedding-preview-0409": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "AmazonCounterfactualClassification": 75.34, + "AmazonPolarityClassification": 97.34, + "AmazonReviewsClassification": 51.17, + "Banking77Classification": 88.62, + "EmotionClassification": 52.51, + "ImdbClassification": 95.65, + "MTOPDomainClassification": 98.35, + "MTOPIntentClassification": 83.43, + "MassiveIntentClassification": 80.22, + "MassiveScenarioClassification": 87.19, + "ToxicConversationsClassification": 89.67, + "TweetSentimentExtractionClassification": 74.52 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "ArxivClusteringP2P": 46.27, + "ArxivClusteringS2S": 38.36, + "BiorxivClusteringP2P": 37.87, + "BiorxivClusteringS2S": 35.67, + "MedrxivClusteringP2P": 33.11, + "MedrxivClusteringS2S": 31.54, + "RedditClustering": 65.81, + "RedditClusteringP2P": 66.62, + "StackExchangeClustering": 74.52, + "StackExchangeClusteringP2P": 37.63, + "TwentyNewsgroupsClustering": 54.87 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "SprintDuplicateQuestions": 96.26, + "TwitterSemEval2015": 79.04, + "TwitterURLCorpus": 87.53 + }, + { + "Model": "google-gecko.text-embedding-preview-0409", + "SprintDuplicateQuestions": 96.26, + "TwitterSemEval2015": 79.04, + "TwitterURLCorpus": 87.53 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "AskUbuntuDupQuestions": 64.4, + "MindSmallReranking": 33.07, + "SciDocsRR": 83.59, + "StackOverflowDupQuestions": 54.56 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-search-babbage-001", - "ArguAna": 49.2, - "ClimateFEVER": 19.9, - "FEVER": 77.0, - "FiQA2018": 42.2, - "HotpotQA": 63.1, - "NFCorpus": 36.7, - "QuoraRetrieval": 69.7, - "SciFact": 70.4, - "TRECCOVID": 58.5, - "Touche2020": 29.7 + "Model": "google-gecko.text-embedding-preview-0409", + "ArguAna": 62.18, + "BrightRetrieval (earth_science)": 34.38, + "BrightRetrieval (leetcode)": 29.64, + "BrightRetrieval (theoremqa_questions)": 21.51, + "BrightRetrieval (aops)": 9.33, + "BrightRetrieval (sustainable_living)": 17.25, + "BrightRetrieval (pony)": 3.59, + "BrightRetrieval (theoremqa_theorems)": 14.31, + "BrightRetrieval (stackoverflow)": 17.93, + "BrightRetrieval (biology)": 22.98, + "BrightRetrieval (robotics)": 15.98, + "BrightRetrieval (economics)": 19.5, + "BrightRetrieval (psychology)": 27.86, + "CQADupstackRetrieval": 48.89, + "ClimateFEVER": 33.21, + "DBPedia": 47.12, + "FEVER": 86.96, + "FiQA2018": 59.24, + "HotpotQA": 71.33, + "MSMARCO": 32.58, + "NFCorpus": 40.33, + "NQ": 61.28, + "QuoraRetrieval": 88.18, + "SCIDOCS": 20.34, + "SciFact": 75.42, + "TRECCOVID": 82.62, + "Touche2020": 25.86 + } + ], + "recall_at_1": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "BrightRetrieval (economics)": 21.84, + "BrightRetrieval (stackoverflow)": 19.23, + "BrightRetrieval (pony)": 0.29, + "BrightRetrieval (earth_science)": 38.0, + "BrightRetrieval (sustainable_living)": 25.65, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (psychology)": 30.69, + "BrightRetrieval (biology)": 30.91 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "BIOSSES": 89.46, + "SICK-R": 81.93, + "STS12": 77.59, + "STS13": 90.36, + "STS14": 85.25, + "STS15": 89.66, + "STS16": 87.34, + "STS17": 92.06, + "STS22": 68.02, + "STSBenchmark": 88.99 } ] }, - "STS": { - "cosine_spearman": [] - }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "SummEval": 32.63 + } + ] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "Core17InstructionRetrieval": 5.44, + "News21InstructionRetrieval": 3.94, + "Robust04InstructionRetrieval": -2.4 + } + ] } }, - "universal-sentence-encoder-multilingual-large-3": { + "gottbert-base": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "AmazonReviewsClassification (fr)": 35.09, - "MTOPDomainClassification (fr)": 88.19, - "MTOPIntentClassification (fr)": 63.64, - "MasakhaNEWSClassification (fra)": 72.04, - "MassiveIntentClassification (fr)": 65.8, - "MassiveScenarioClassification (fr)": 73.47 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "universal-sentence-encoder-multilingual-large-3", - "AlloProfClusteringP2P": 54.21, - "AlloProfClusteringS2S": 37.95, - "HALClusteringS2S": 18.94, - "MLSUMClusteringP2P": 41.02, - "MLSUMClusteringS2S": 37.97, - "MasakhaNEWSClusteringP2P (fra)": 24.09, - "MasakhaNEWSClusteringS2S (fra)": 40.24 + "Model": "gottbert-base", + "BlurbsClusteringP2P": 34.49, + "BlurbsClusteringS2S": 8.37, + "TenKGnadClusteringP2P": 33.66, + "TenKGnadClusteringS2S": 9.34 } ] }, "PairClassification": { - "max_ap": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "OpusparcusPC (fr)": 93.38, - "PawsXPairClassification (fr)": 53.62 - }, - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "OpusparcusPC (fr)": 93.38, - "PawsXPairClassification (fr)": 53.66 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "AlloprofReranking": 55.39, - "SyntecReranking": 77.13 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "AlloprofRetrieval": 33.78, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 26.21, - "SyntecRetrieval": 63.69, - "XPQARetrieval (fr)": 65.21 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "SICKFr": 74.39, - "STS22 (fr)": 71.11, - "STSBenchmarkMultilingualSTS (fr)": 78.16 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "universal-sentence-encoder-multilingual-large-3", - "SummEvalFr": 28.56 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -11052,126 +10704,175 @@ "p-MRR": [] } }, - "glove.6B.300d": { + "gte-Qwen1.5-7B-instruct": { "BitextMining": { - "f1": [ - { - "Model": "glove.6B.300d", - "BUCC (de-en)": 0.18, - "BUCC (fr-en)": 0.19, - "BUCC (ru-en)": 0.1, - "BUCC (zh-en)": 0.0 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "glove.6B.300d", - "AmazonCounterfactualClassification (en)": 56.91, - "AmazonPolarityClassification": 60.32, - "AmazonReviewsClassification (en)": 29.67, - "Banking77Classification": 67.69, - "EmotionClassification": 36.93, - "ImdbClassification": 62.57, - "MTOPDomainClassification (en)": 79.11, - "MTOPIntentClassification (en)": 55.85, - "MassiveIntentClassification (en)": 56.19, - "MassiveScenarioClassification (en)": 66.03, - "ToxicConversationsClassification": 65.4, - "TweetSentimentExtractionClassification": 50.8 + "Model": "gte-Qwen1.5-7B-instruct", + "AmazonCounterfactualClassification": 83.16, + "AmazonPolarityClassification": 96.7, + "AmazonReviewsClassification": 52.95, + "Banking77Classification": 81.68, + "EmotionClassification": 54.53, + "IFlyTek": 53.77, + "ImdbClassification": 95.58, + "JDReview": 88.2, + "MTOPDomainClassification": 95.75, + "MTOPIntentClassification": 84.26, + "MassiveIntentClassification": 78.47, + "MassiveScenarioClassification": 77.26, + "MultilingualSentiment": 77.42, + "OnlineShopping": 94.48, + "TNews": 51.24, + "ToxicConversationsClassification": 78.75, + "TweetSentimentExtractionClassification": 66.0, + "Waimai": 88.63 } ] }, "Clustering": { "v_measure": [ { - "Model": "glove.6B.300d", - "ArxivClusteringP2P": 32.56, - "ArxivClusteringS2S": 23.14, - "BiorxivClusteringP2P": 29.27, - "BiorxivClusteringS2S": 19.18, - "MedrxivClusteringP2P": 26.12, - "MedrxivClusteringS2S": 20.38, - "RedditClustering": 28.46, - "RedditClusteringP2P": 35.82, - "StackExchangeClustering": 35.8, - "StackExchangeClusteringP2P": 28.51, - "TwentyNewsgroupsClustering": 25.83 + "Model": "gte-Qwen1.5-7B-instruct", + "ArxivClusteringP2P": 56.4, + "ArxivClusteringS2S": 51.45, + "BiorxivClusteringP2P": 49.01, + "BiorxivClusteringS2S": 45.06, + "CLSClusteringP2P": 47.21, + "CLSClusteringS2S": 45.79, + "MedrxivClusteringP2P": 44.37, + "MedrxivClusteringS2S": 42.0, + "RedditClustering": 73.37, + "RedditClusteringP2P": 72.51, + "StackExchangeClustering": 79.07, + "StackExchangeClusteringP2P": 49.57, + "ThuNewsClusteringP2P": 87.43, + "ThuNewsClusteringS2S": 87.9, + "TwentyNewsgroupsClustering": 51.31 } ] }, "PairClassification": { "max_ap": [ { - "Model": "glove.6B.300d", - "SprintDuplicateQuestions": 86.96, - "TwitterSemEval2015": 48.45, - "TwitterURLCorpus": 77.35 + "Model": "gte-Qwen1.5-7B-instruct", + "Cmnli": 91.81, + "Ocnli": 85.22, + "SprintDuplicateQuestions": 95.99, + "TwitterSemEval2015": 79.36, + "TwitterURLCorpus": 86.79 }, { - "Model": "glove.6B.300d", - "SprintDuplicateQuestions": 86.96, - "TwitterSemEval2015": 53.12, - "TwitterURLCorpus": 77.35 + "Model": "gte-Qwen1.5-7B-instruct", + "Cmnli": 91.85, + "Ocnli": 85.28, + "SprintDuplicateQuestions": 96.07, + "TwitterSemEval2015": 79.36, + "TwitterURLCorpus": 86.79 } ] }, "Reranking": { "map": [ { - "Model": "glove.6B.300d", - "AskUbuntuDupQuestions": 49.57, - "MindSmallReranking": 27.01, - "SciDocsRR": 62.56, - "StackOverflowDupQuestions": 34.03 + "Model": "gte-Qwen1.5-7B-instruct", + "AskUbuntuDupQuestions": 66.0, + "CMedQAv1": 86.37, + "CMedQAv2": 87.41, + "MindSmallReranking": 32.71, + "SciDocsRR": 87.89, + "StackOverflowDupQuestions": 53.93, + "T2Reranking": 68.11 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "glove.6B.300d", - "ArguAna": 36.3, - "CQADupstackRetrieval": 15.47, - "ClimateFEVER": 14.44, - "DBPedia": 18.28, - "FEVER": 14.99, - "FiQA2018": 10.09, - "HotpotQA": 19.18, - "MSMARCO": 9.6, - "NFCorpus": 13.87, - "NQ": 12.87, - "QuoraRetrieval": 71.32, - "SCIDOCS": 8.04, - "SciFact": 29.58, - "TRECCOVID": 36.22, - "Touche2020": 13.99 + "Model": "gte-Qwen1.5-7B-instruct", + "ArguAna": 62.65, + "BrightRetrieval (stackoverflow)": 19.85, + "BrightRetrieval (earth_science)": 36.22, + "BrightRetrieval (leetcode)": 25.46, + "BrightRetrieval (theoremqa_questions)": 26.97, + "BrightRetrieval (economics)": 17.72, + "BrightRetrieval (robotics)": 13.47, + "BrightRetrieval (pony)": 9.79, + "BrightRetrieval (aops)": 14.36, + "BrightRetrieval (psychology)": 24.61, + "BrightRetrieval (theoremqa_theorems)": 30.8, + "BrightRetrieval (biology)": 30.92, + "BrightRetrieval (sustainable_living)": 14.93, + "CQADupstackRetrieval": 40.64, + "ClimateFEVER": 44.0, + "CmedqaRetrieval": 43.47, + "CovidRetrieval": 80.87, + "DBPedia": 48.04, + "DuRetrieval": 86.01, + "EcomRetrieval": 66.46, + "FEVER": 93.35, + "FiQA2018": 55.31, + "HotpotQA": 72.25, + "MMarcoRetrieval": 73.83, + "MSMARCO": 41.68, + "MedicalRetrieval": 61.33, + "NFCorpus": 38.25, + "NQ": 61.79, + "QuoraRetrieval": 89.61, + "SCIDOCS": 27.69, + "SciFact": 75.31, + "T2Retrieval": 83.58, + "TRECCOVID": 72.72, + "Touche2020": 20.3, + "VideoRetrieval": 69.41 + } + ], + "recall_at_1": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "BrightRetrieval (economics)": 25.73, + "BrightRetrieval (pony)": 1.32, + "BrightRetrieval (robotics)": 21.29, + "BrightRetrieval (biology)": 39.24, + "BrightRetrieval (earth_science)": 36.13, + "BrightRetrieval (stackoverflow)": 23.5, + "BrightRetrieval (psychology)": 42.28, + "BrightRetrieval (sustainable_living)": 33.1 } ] }, "STS": { "cosine_spearman": [ { - "Model": "glove.6B.300d", - "BIOSSES": 44.93, - "SICK-R": 55.43, - "STS12": 54.64, - "STS13": 69.16, - "STS14": 60.81, - "STS15": 72.31, - "STS16": 65.34, - "STS17 (en-en)": 77.95, - "STS22 (en)": 56.35, - "STSBenchmark": 61.54 + "Model": "gte-Qwen1.5-7B-instruct", + "AFQMC": 58.47, + "ATEC": 55.46, + "BIOSSES": 81.12, + "BQ": 77.59, + "LCQMC": 76.29, + "PAWSX": 50.22, + "QBQTC": 31.82, + "SICK-R": 79.15, + "STS12": 76.52, + "STS13": 88.63, + "STS14": 83.32, + "STS15": 87.5, + "STS16": 86.39, + "STS17": 87.79, + "STS22": 67.36, + "STSB": 81.37, + "STSBenchmark": 87.35 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "glove.6B.300d", - "SummEval": 28.87 + "Model": "gte-Qwen1.5-7B-instruct", + "SummEval": 31.46 } ] }, @@ -11182,7 +10883,7 @@ "p-MRR": [] } }, - "nomic-embed-text-v1": { + "gte-Qwen2-7B-instruct": { "BitextMining": { "f1": [] }, @@ -11201,16 +10902,32 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "nomic-embed-text-v1", - "LEMBNarrativeQARetrieval": 41.23, - "LEMBQMSumRetrieval": 36.65, - "LEMBSummScreenFDRetrieval": 92.97, - "LEMBWikimQARetrieval": 73.75 - }, + "Model": "gte-Qwen2-7B-instruct", + "BrightRetrieval (earth_science)": 40.66, + "BrightRetrieval (sustainable_living)": 20.82, + "BrightRetrieval (theoremqa_theorems)": 34.22, + "BrightRetrieval (aops)": 15.1, + "BrightRetrieval (economics)": 16.18, + "BrightRetrieval (pony)": 1.25, + "BrightRetrieval (stackoverflow)": 13.95, + "BrightRetrieval (leetcode)": 31.07, + "BrightRetrieval (biology)": 32.09, + "BrightRetrieval (theoremqa_questions)": 29.9, + "BrightRetrieval (robotics)": 12.82, + "BrightRetrieval (psychology)": 26.58 + } + ], + "recall_at_1": [ { - "Model": "nomic-embed-text-v1", - "LEMBNeedleRetrieval": 39.5, - "LEMBPasskeyRetrieval": 44.75 + "Model": "gte-Qwen2-7B-instruct", + "BrightRetrieval (psychology)": 46.73, + "BrightRetrieval (biology)": 34.87, + "BrightRetrieval (sustainable_living)": 31.28, + "BrightRetrieval (robotics)": 10.89, + "BrightRetrieval (pony)": 1.17, + "BrightRetrieval (earth_science)": 38.36, + "BrightRetrieval (stackoverflow)": 16.67, + "BrightRetrieval (economics)": 27.67 } ] }, @@ -11227,118 +10944,118 @@ "p-MRR": [] } }, - "LLM2Vec-Llama-2-unsupervised": { + "gtr-t5-base": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "AmazonCounterfactualClassification (en)": 76.91, - "AmazonPolarityClassification": 79.05, - "AmazonReviewsClassification (en)": 40.08, - "Banking77Classification": 84.65, - "EmotionClassification": 46.58, - "ImdbClassification": 75.68, - "MTOPDomainClassification (en)": 94.33, - "MTOPIntentClassification (en)": 79.54, - "MassiveIntentClassification (en)": 73.84, - "MassiveScenarioClassification (en)": 79.17, - "ToxicConversationsClassification": 71.81, - "TweetSentimentExtractionClassification": 57.17 + "Model": "gtr-t5-base", + "AmazonCounterfactualClassification": 69.33, + "AmazonPolarityClassification": 67.82, + "AmazonReviewsClassification": 38.48, + "Banking77Classification": 79.26, + "EmotionClassification": 42.2, + "ImdbClassification": 65.99, + "MTOPDomainClassification": 92.42, + "MTOPIntentClassification": 62.44, + "MassiveIntentClassification": 67.05, + "MassiveScenarioClassification": 75.4, + "ToxicConversationsClassification": 66.6, + "TweetSentimentExtractionClassification": 56.02 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "ArxivClusteringP2P": 47.81, - "ArxivClusteringS2S": 40.53, - "BiorxivClusteringP2P": 38.12, - "BiorxivClusteringS2S": 31.25, - "MedrxivClusteringP2P": 30.94, - "MedrxivClusteringS2S": 28.04, - "RedditClustering": 42.84, - "RedditClusteringP2P": 60.1, - "StackExchangeClustering": 65.12, - "StackExchangeClusteringP2P": 33.61, - "TwentyNewsgroupsClustering": 30.76 + "Model": "gtr-t5-base", + "ArxivClusteringP2P": 35.49, + "ArxivClusteringS2S": 27.18, + "BiorxivClusteringP2P": 27.66, + "BiorxivClusteringS2S": 23.25, + "MedrxivClusteringP2P": 27.57, + "MedrxivClusteringS2S": 25.13, + "RedditClustering": 56.13, + "RedditClusteringP2P": 58.53, + "StackExchangeClustering": 64.21, + "StackExchangeClusteringP2P": 33.01, + "TwentyNewsgroupsClustering": 46.72 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "SprintDuplicateQuestions": 87.57, - "TwitterSemEval2015": 65.14, - "TwitterURLCorpus": 80.94 + "Model": "gtr-t5-base", + "SprintDuplicateQuestions": 94.55, + "TwitterSemEval2015": 72.23, + "TwitterURLCorpus": 84.77 }, { - "Model": "LLM2Vec-Llama-2-unsupervised", - "SprintDuplicateQuestions": 87.57, - "TwitterSemEval2015": 65.14, - "TwitterURLCorpus": 80.94 + "Model": "gtr-t5-base", + "SprintDuplicateQuestions": 94.55, + "TwitterSemEval2015": 72.23, + "TwitterURLCorpus": 84.77 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "AskUbuntuDupQuestions": 55.56, - "MindSmallReranking": 30.86, - "SciDocsRR": 77.62, - "StackOverflowDupQuestions": 47.77 + "Model": "gtr-t5-base", + "AskUbuntuDupQuestions": 60.86, + "MindSmallReranking": 31.33, + "SciDocsRR": 73.71, + "StackOverflowDupQuestions": 51.01 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "ArguAna": 47.09, - "CQADupstackRetrieval": 30.78, - "ClimateFEVER": 20.67, - "DBPedia": 25.81, - "FEVER": 43.48, - "FiQA2018": 24.62, - "HotpotQA": 48.46, - "MSMARCO": 18.81, - "NFCorpus": 26.81, - "NQ": 33.21, - "QuoraRetrieval": 86.15, - "SCIDOCS": 10.0, - "SciFact": 64.48, - "TRECCOVID": 60.67, - "Touche2020": 10.18 + "Model": "gtr-t5-base", + "ArguAna": 50.83, + "CQADupstackRetrieval": 34.55, + "ClimateFEVER": 24.88, + "DBPedia": 35.24, + "FEVER": 68.93, + "FiQA2018": 35.15, + "HotpotQA": 54.93, + "MSMARCO": 41.16, + "NFCorpus": 30.22, + "NQ": 50.47, + "QuoraRetrieval": 87.98, + "SCIDOCS": 14.0, + "SciFact": 59.74, + "TRECCOVID": 56.05, + "Touche2020": 25.89 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "BIOSSES": 82.41, - "SICK-R": 71.77, - "STS12": 65.39, - "STS13": 79.26, - "STS14": 72.98, - "STS15": 82.72, - "STS16": 81.02, - "STS17 (en-en)": 86.7, - "STS22 (en)": 63.47, - "STSBenchmark": 78.32 + "Model": "gtr-t5-base", + "BIOSSES": 79.0, + "SICK-R": 71.45, + "STS12": 68.59, + "STS13": 79.09, + "STS14": 74.64, + "STS15": 84.85, + "STS16": 81.57, + "STS17": 85.8, + "STS22": 66.17, + "STSBenchmark": 79.58 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LLM2Vec-Llama-2-unsupervised", - "SummEval": 31.38 + "Model": "gtr-t5-base", + "SummEval": 29.67 } ] }, @@ -11349,913 +11066,246 @@ "p-MRR": [] } }, - "multilingual-e5-base": { + "gtr-t5-large": { "BitextMining": { - "f1": [ - { - "Model": "multilingual-e5-base", - "BornholmBitextMining (dan-Latn)": 33.22, - "BornholmBitextMining": 46.4, - "Tatoeba (csb-Latn_eng-Latn)": 24.29, - "Tatoeba (spa-Latn_eng-Latn)": 96.97, - "Tatoeba (kzj-Latn_eng-Latn)": 6.26, - "Tatoeba (uzb-Latn_eng-Latn)": 62.63, - "Tatoeba (mal-Mlym_eng-Latn)": 96.72, - "Tatoeba (dtp-Latn_eng-Latn)": 5.13, - "Tatoeba (ces-Latn_eng-Latn)": 88.75, - "Tatoeba (mon-Cyrl_eng-Latn)": 78.37, - "Tatoeba (fry-Latn_eng-Latn)": 50.82, - "Tatoeba (cbk-Latn_eng-Latn)": 60.66, - "Tatoeba (awa-Deva_eng-Latn)": 68.39, - "Tatoeba (cha-Latn_eng-Latn)": 16.95, - "Tatoeba (nld-Latn_eng-Latn)": 93.2, - "Tatoeba (mhr-Cyrl_eng-Latn)": 5.52, - "Tatoeba (cmn-Hans_eng-Latn)": 93.35, - "Tatoeba (arq-Arab_eng-Latn)": 26.61, - "Tatoeba (kor-Hang_eng-Latn)": 83.37, - "Tatoeba (pol-Latn_eng-Latn)": 94.57, - "Tatoeba (uig-Arab_eng-Latn)": 62.97, - "Tatoeba (zsm-Latn_eng-Latn)": 92.45, - "Tatoeba (tuk-Latn_eng-Latn)": 19.67, - "Tatoeba (ind-Latn_eng-Latn)": 90.26, - "Tatoeba (aze-Latn_eng-Latn)": 84.71, - "Tatoeba (xho-Latn_eng-Latn)": 73.24, - "Tatoeba (war-Latn_eng-Latn)": 47.18, - "Tatoeba (ast-Latn_eng-Latn)": 74.36, - "Tatoeba (ido-Latn_eng-Latn)": 74.41, - "Tatoeba (dsb-Latn_eng-Latn)": 34.36, - "Tatoeba (est-Latn_eng-Latn)": 70.64, - "Tatoeba (ina-Latn_eng-Latn)": 86.11, - "Tatoeba (bel-Cyrl_eng-Latn)": 86.7, - "Tatoeba (fin-Latn_eng-Latn)": 86.15, - "Tatoeba (ang-Latn_eng-Latn)": 29.87, - "Tatoeba (gle-Latn_eng-Latn)": 58.62, - "Tatoeba (slk-Latn_eng-Latn)": 86.42, - "Tatoeba (hye-Armn_eng-Latn)": 85.85, - "Tatoeba (tgl-Latn_eng-Latn)": 83.78, - "Tatoeba (pam-Latn_eng-Latn)": 6.92, - "Tatoeba (eus-Latn_eng-Latn)": 56.26, - "Tatoeba (slv-Latn_eng-Latn)": 81.93, - "Tatoeba (nno-Latn_eng-Latn)": 82.67, - "Tatoeba (wuu-Hans_eng-Latn)": 78.65, - "Tatoeba (nds-Latn_eng-Latn)": 53.86, - "Tatoeba (deu-Latn_eng-Latn)": 97.07, - "Tatoeba (ita-Latn_eng-Latn)": 90.61, - "Tatoeba (rus-Cyrl_eng-Latn)": 91.78, - "Tatoeba (bos-Latn_eng-Latn)": 88.86, - "Tatoeba (glg-Latn_eng-Latn)": 82.69, - "Tatoeba (kaz-Cyrl_eng-Latn)": 75.56, - "Tatoeba (nov-Latn_eng-Latn)": 66.96, - "Tatoeba (tam-Taml_eng-Latn)": 85.12, - "Tatoeba (dan-Latn_eng-Latn)": 91.23, - "Tatoeba (oci-Latn_eng-Latn)": 35.79, - "Tatoeba (arz-Arab_eng-Latn)": 66.79, - "Tatoeba (gsw-Latn_eng-Latn)": 43.53, - "Tatoeba (jav-Latn_eng-Latn)": 61.25, - "Tatoeba (sqi-Latn_eng-Latn)": 90.06, - "Tatoeba (vie-Latn_eng-Latn)": 94.55, - "Tatoeba (lit-Latn_eng-Latn)": 75.53, - "Tatoeba (isl-Latn_eng-Latn)": 76.9, - "Tatoeba (hsb-Latn_eng-Latn)": 40.36, - "Tatoeba (hun-Latn_eng-Latn)": 84.41, - "Tatoeba (kab-Latn_eng-Latn)": 21.77, - "Tatoeba (ceb-Latn_eng-Latn)": 45.46, - "Tatoeba (ber-Tfng_eng-Latn)": 23.59, - "Tatoeba (jpn-Jpan_eng-Latn)": 90.3, - "Tatoeba (max-Deva_eng-Latn)": 52.4, - "Tatoeba (ara-Arab_eng-Latn)": 82.86, - "Tatoeba (nob-Latn_eng-Latn)": 95.9, - "Tatoeba (cat-Latn_eng-Latn)": 84.09, - "Tatoeba (orv-Cyrl_eng-Latn)": 16.0, - "Tatoeba (cor-Latn_eng-Latn)": 4.38, - "Tatoeba (tel-Telu_eng-Latn)": 88.49, - "Tatoeba (ell-Grek_eng-Latn)": 89.96, - "Tatoeba (bre-Latn_eng-Latn)": 5.44, - "Tatoeba (swg-Latn_eng-Latn)": 42.33, - "Tatoeba (pes-Arab_eng-Latn)": 87.18, - "Tatoeba (cym-Latn_eng-Latn)": 65.69, - "Tatoeba (heb-Hebr_eng-Latn)": 74.26, - "Tatoeba (urd-Arab_eng-Latn)": 86.2, - "Tatoeba (amh-Ethi_eng-Latn)": 74.93, - "Tatoeba (lvs-Latn_eng-Latn)": 76.76, - "Tatoeba (swe-Latn_eng-Latn)": 91.33, - "Tatoeba (ukr-Cyrl_eng-Latn)": 88.29, - "Tatoeba (gla-Latn_eng-Latn)": 43.08, - "Tatoeba (mar-Deva_eng-Latn)": 86.62, - "Tatoeba (khm-Khmr_eng-Latn)": 47.27, - "Tatoeba (tat-Cyrl_eng-Latn)": 66.92, - "Tatoeba (lat-Latn_eng-Latn)": 39.62, - "Tatoeba (pms-Latn_eng-Latn)": 44.61, - "Tatoeba (hrv-Latn_eng-Latn)": 92.5, - "Tatoeba (mkd-Cyrl_eng-Latn)": 73.76, - "Tatoeba (bul-Cyrl_eng-Latn)": 88.95, - "Tatoeba (kat-Geor_eng-Latn)": 77.83, - "Tatoeba (hin-Deva_eng-Latn)": 93.13, - "Tatoeba (fao-Latn_eng-Latn)": 64.72, - "Tatoeba (ben-Beng_eng-Latn)": 81.05, - "Tatoeba (epo-Latn_eng-Latn)": 92.07, - "Tatoeba (por-Latn_eng-Latn)": 92.74, - "Tatoeba (tur-Latn_eng-Latn)": 92.54, - "Tatoeba (yue-Hant_eng-Latn)": 80.66, - "Tatoeba (srp-Cyrl_eng-Latn)": 89.08, - "Tatoeba (swh-Latn_eng-Latn)": 66.81, - "Tatoeba (tha-Thai_eng-Latn)": 94.22, - "Tatoeba (ron-Latn_eng-Latn)": 91.27, - "Tatoeba (fra-Latn_eng-Latn)": 92.76, - "Tatoeba (yid-Hebr_eng-Latn)": 63.2, - "Tatoeba (afr-Latn_eng-Latn)": 87.04, - "Tatoeba (tzl-Latn_eng-Latn)": 34.44, - "Tatoeba (lfn-Latn_eng-Latn)": 52.85, - "Tatoeba (ile-Latn_eng-Latn)": 72.56, - "Tatoeba (kur-Latn_eng-Latn)": 52.96 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "multilingual-e5-base", - "AllegroReviews (pol-Latn)": 40.78, - "AllegroReviews": 40.85, - "AmazonCounterfactualClassification (en-ext)": 76.91, - "AmazonCounterfactualClassification (en)": 77.36, - "AmazonCounterfactualClassification (deu-Latn)": 70.81, - "AmazonCounterfactualClassification (jpn-Jpan)": 72.02, - "AmazonPolarityClassification": 91.76, - "AmazonReviewsClassification (en)": 47.54, - "AmazonReviewsClassification (deu-Latn)": 44.37, - "AmazonReviewsClassification (spa-Latn)": 43.38, - "AmazonReviewsClassification (fra-Latn)": 41.55, - "AmazonReviewsClassification (jpn-Jpan)": 39.57, - "AmazonReviewsClassification (cmn-Hans)": 38.34, - "AmazonReviewsClassification (fr)": 40.94, - "AngryTweetsClassification (dan-Latn)": 56.28, - "AngryTweetsClassification": 54.65, - "Banking77Classification": 73.53, - "CBD (pol-Latn)": 62.6, - "CBD": 62.66, - "DKHateClassification": 63.53, - "DanishPoliticalCommentsClassification (dan-Latn)": 36.41, - "DanishPoliticalCommentsClassification": 36.69, - "EmotionClassification": 45.68, - "GeoreviewClassification (rus-Cyrl)": 46.05, - "HeadlineClassification (rus-Cyrl)": 75.64, - "IFlyTek (cmn-Hans)": 40.81, - "IFlyTek": 44.93, - "ImdbClassification": 84.29, - "InappropriatenessClassification (rus-Cyrl)": 58.78, - "JDReview (cmn-Hans)": 75.72, - "JDReview": 76.21, - "KinopoiskClassification (rus-Cyrl)": 50.89, - "LccSentimentClassification (dan-Latn)": 60.13, - "LccSentimentClassification": 59.67, - "MTOPDomainClassification (en)": 90.9, - "MTOPDomainClassification (deu-Latn)": 87.94, - "MTOPDomainClassification (spa-Latn)": 85.96, - "MTOPDomainClassification (fra-Latn)": 82.88, - "MTOPDomainClassification (hin-Deva)": 83.92, - "MTOPDomainClassification (tha-Thai)": 83.94, - "MTOPDomainClassification (fr)": 84.79, - "MTOPIntentClassification (en)": 61.6, - "MTOPIntentClassification (deu-Latn)": 61.05, - "MTOPIntentClassification (spa-Latn)": 55.36, - "MTOPIntentClassification (fra-Latn)": 52.23, - "MTOPIntentClassification (hin-Deva)": 53.93, - "MTOPIntentClassification (tha-Thai)": 58.69, - "MTOPIntentClassification (fr)": 55.51, - "MasakhaNEWSClassification (amh-Ethi)": 83.8, - "MasakhaNEWSClassification (eng)": 76.49, - "MasakhaNEWSClassification (fra-Latn)": 76.35, - "MasakhaNEWSClassification (hau-Latn)": 74.63, - "MasakhaNEWSClassification (ibo-Latn)": 64.59, - "MasakhaNEWSClassification (lin-Latn)": 70.57, - "MasakhaNEWSClassification (lug-Latn)": 68.12, - "MasakhaNEWSClassification (orm-Ethi)": 71.75, - "MasakhaNEWSClassification (pcm-Latn)": 91.05, - "MasakhaNEWSClassification (run-Latn)": 73.35, - "MasakhaNEWSClassification (sna-Latn)": 84.17, - "MasakhaNEWSClassification (som-Latn)": 60.1, - "MasakhaNEWSClassification (swa-Latn)": 70.74, - "MasakhaNEWSClassification (tir-Ethi)": 67.1, - "MasakhaNEWSClassification (xho-Latn)": 76.03, - "MasakhaNEWSClassification (yor-Latn)": 72.75, - "MasakhaNEWSClassification (fra)": 79.69, - "MassiveIntentClassification (spa-Latn)": 61.13, - "MassiveIntentClassification (deu-Latn)": 59.82, - "MassiveIntentClassification (mon-Cyrl)": 46.8, - "MassiveIntentClassification (sqi-Latn)": 51.07, - "MassiveIntentClassification (swe-Latn)": 62.43, - "MassiveIntentClassification (tur-Latn)": 60.69, - "MassiveIntentClassification (hye-Armn)": 48.77, - "MassiveIntentClassification (aze-Latn)": 51.36, - "MassiveIntentClassification (dan-Latn)": 60.69, - "MassiveIntentClassification (mya-Mymr)": 46.67, - "MassiveIntentClassification (tha-Thai)": 59.63, - "MassiveIntentClassification (slv-Latn)": 53.84, - "MassiveIntentClassification (swa-Latn)": 45.24, - "MassiveIntentClassification (pol-Latn)": 60.98, - "MassiveIntentClassification (ben-Beng)": 51.69, - "MassiveIntentClassification (nob-Latn)": 60.06, - "MassiveIntentClassification (fin-Latn)": 58.91, - "MassiveIntentClassification (jav-Latn)": 43.23, - "MassiveIntentClassification (ind-Latn)": 58.7, - "MassiveIntentClassification (kat-Geor)": 37.56, - "MassiveIntentClassification (afr-Latn)": 49.82, - "MassiveIntentClassification (khm-Khmr)": 32.14, - "MassiveIntentClassification (lav-Latn)": 51.17, - "MassiveIntentClassification (tgl-Latn)": 48.99, - "MassiveIntentClassification (kor-Kore)": 59.97, - "MassiveIntentClassification (amh-Ethi)": 42.4, - "MassiveIntentClassification (ara-Arab)": 50.2, - "MassiveIntentClassification (ell-Grek)": 58.07, - "MassiveIntentClassification (fas-Arab)": 59.51, - "MassiveIntentClassification (hin-Deva)": 56.75, - "MassiveIntentClassification (fra-Latn)": 61.32, - "MassiveIntentClassification (isl-Latn)": 44.52, - "MassiveIntentClassification (tel-Telu)": 50.09, - "MassiveIntentClassification (jpn-Jpan)": 62.3, - "MassiveIntentClassification (vie-Latn)": 59.61, - "MassiveIntentClassification (hun-Latn)": 57.69, - "MassiveIntentClassification (en)": 65.71, - "MassiveIntentClassification (msa-Latn)": 52.85, - "MassiveIntentClassification (cmo-Hans)": 63.22, - "MassiveIntentClassification (ron-Latn)": 56.83, - "MassiveIntentClassification (heb-Hebr)": 55.3, - "MassiveIntentClassification (urd-Arab)": 51.3, - "MassiveIntentClassification (por-Latn)": 62.12, - "MassiveIntentClassification (ita-Latn)": 61.29, - "MassiveIntentClassification (tam-Taml)": 48.93, - "MassiveIntentClassification (cym-Latn)": 37.05, - "MassiveIntentClassification (kan-Knda)": 48.63, - "MassiveIntentClassification (nld-Latn)": 61.23, - "MassiveIntentClassification (mal-Mlym)": 53.75, - "MassiveIntentClassification (cmo-Hant)": 56.4, - "MassiveIntentClassification (rus-Cyrl)": 62.78, - "MassiveIntentClassification (da)": 60.16, - "MassiveIntentClassification (nb)": 59.83, - "MassiveIntentClassification (sv)": 61.78, - "MassiveIntentClassification (pl)": 61.04, - "MassiveScenarioClassification (aze-Latn)": 55.15, - "MassiveScenarioClassification (fra-Latn)": 67.37, - "MassiveScenarioClassification (kor-Kore)": 67.9, - "MassiveScenarioClassification (dan-Latn)": 67.97, - "MassiveScenarioClassification (fin-Latn)": 64.94, - "MassiveScenarioClassification (ron-Latn)": 63.5, - "MassiveScenarioClassification (cmo-Hant)": 63.73, - "MassiveScenarioClassification (ara-Arab)": 58.0, - "MassiveScenarioClassification (kan-Knda)": 53.49, - "MassiveScenarioClassification (nld-Latn)": 68.62, - "MassiveScenarioClassification (sqi-Latn)": 57.92, - "MassiveScenarioClassification (tam-Taml)": 53.86, - "MassiveScenarioClassification (amh-Ethi)": 50.33, - "MassiveScenarioClassification (mal-Mlym)": 59.89, - "MassiveScenarioClassification (hun-Latn)": 65.75, - "MassiveScenarioClassification (vie-Latn)": 66.35, - "MassiveScenarioClassification (tgl-Latn)": 54.36, - "MassiveScenarioClassification (kat-Geor)": 43.38, - "MassiveScenarioClassification (slv-Latn)": 58.3, - "MassiveScenarioClassification (cym-Latn)": 43.84, - "MassiveScenarioClassification (ita-Latn)": 66.17, - "MassiveScenarioClassification (isl-Latn)": 53.28, - "MassiveScenarioClassification (fas-Arab)": 63.92, - "MassiveScenarioClassification (por-Latn)": 65.49, - "MassiveScenarioClassification (mon-Cyrl)": 51.87, - "MassiveScenarioClassification (lav-Latn)": 56.42, - "MassiveScenarioClassification (rus-Cyrl)": 68.21, - "MassiveScenarioClassification (ben-Beng)": 57.0, - "MassiveScenarioClassification (en)": 71.57, - "MassiveScenarioClassification (hye-Armn)": 53.63, - "MassiveScenarioClassification (tur-Latn)": 65.18, - "MassiveScenarioClassification (jpn-Jpan)": 69.89, - "MassiveScenarioClassification (nob-Latn)": 66.57, - "MassiveScenarioClassification (swe-Latn)": 69.35, - "MassiveScenarioClassification (ind-Latn)": 63.6, - "MassiveScenarioClassification (tha-Thai)": 67.37, - "MassiveScenarioClassification (deu-Latn)": 68.4, - "MassiveScenarioClassification (jav-Latn)": 51.94, - "MassiveScenarioClassification (swa-Latn)": 52.64, - "MassiveScenarioClassification (msa-Latn)": 58.35, - "MassiveScenarioClassification (spa-Latn)": 66.47, - "MassiveScenarioClassification (tel-Telu)": 54.24, - "MassiveScenarioClassification (heb-Hebr)": 62.53, - "MassiveScenarioClassification (mya-Mymr)": 50.77, - "MassiveScenarioClassification (afr-Latn)": 58.95, - "MassiveScenarioClassification (ell-Grek)": 65.38, - "MassiveScenarioClassification (khm-Khmr)": 38.45, - "MassiveScenarioClassification (pol-Latn)": 66.12, - "MassiveScenarioClassification (urd-Arab)": 56.74, - "MassiveScenarioClassification (hin-Deva)": 62.91, - "MassiveScenarioClassification (cmo-Hans)": 70.24, - "MassiveScenarioClassification (da)": 67.46, - "MassiveScenarioClassification (nb)": 66.18, - "MassiveScenarioClassification (sv)": 69.15, - "MassiveScenarioClassification (pl)": 66.11, - "MultilingualSentiment (cmn-Hans)": 67.56, - "MultilingualSentiment": 65.28, - "NoRecClassification (nob-Latn)": 53.74, - "NoRecClassification": 57.58, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 75.85, - "NordicLangClassification": 75.94, - "NorwegianParliament": 59.94, - "OnlineShopping (cmn-Hans)": 88.66, - "OnlineShopping": 88.4, - "PAC (pol-Latn)": 70.87, - "PAC": 70.87, - "PolEmo2.0-IN (pol-Latn)": 67.59, - "PolEmo2.0-IN": 67.66, - "PolEmo2.0-OUT (pol-Latn)": 43.93, - "PolEmo2.0-OUT": 43.91, - "RuReviewsClassification (rus-Cyrl)": 62.99, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.28, - "RuSciBenchOECDClassification (rus-Cyrl)": 42.69, - "ScalaDaClassification": 50.79, - "ScalaNbClassification": 50.32, - "TNews (cmn-Hans)": 47.52, - "TNews": 47.06, - "ToxicConversationsClassification": 64.33, - "TweetSentimentExtractionClassification": 62.8, - "Waimai (cmn-Hans)": 85.98, - "Waimai": 84.42 + "Model": "gtr-t5-large", + "AmazonCounterfactualClassification": 45.87, + "AmazonPolarityClassification": 73.92, + "AmazonReviewsClassification": 21.83, + "Banking77Classification": 81.21, + "EmotionClassification": 46.33, + "ImdbClassification": 70.86, + "MTOPDomainClassification": 16.36, + "MTOPIntentClassification": 5.38, + "MassiveIntentClassification": 4.64, + "MassiveScenarioClassification": 8.16, + "ToxicConversationsClassification": 68.65, + "TweetSentimentExtractionClassification": 54.09 } ] }, "Clustering": { "v_measure": [ { - "Model": "multilingual-e5-base", - "8TagsClustering": 24.97, - "AlloProfClusteringP2P": 62.09, - "AlloProfClusteringS2S": 32.98, - "ArxivClusteringP2P": 43.35, - "ArxivClusteringS2S": 36.0, - "BiorxivClusteringP2P": 37.55, - "BiorxivClusteringS2S": 30.33, - "CLSClusteringP2P": 32.41, - "CLSClusteringS2S": 36.99, - "GeoreviewClusteringP2P (rus-Cyrl)": 54.46, - "HALClusteringS2S": 22.48, - "MLSUMClusteringP2P (rus-Cyrl)": 43.47, - "MLSUMClusteringP2P": 43.48, - "MLSUMClusteringS2S (rus-Cyrl)": 40.87, - "MLSUMClusteringS2S": 38.53, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 58.05, - "MasakhaNEWSClusteringP2P (eng)": 43.8, - "MasakhaNEWSClusteringP2P (fra-Latn)": 58.28, - "MasakhaNEWSClusteringP2P (hau-Latn)": 44.78, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 44.97, - "MasakhaNEWSClusteringP2P (lin-Latn)": 48.08, - "MasakhaNEWSClusteringP2P (lug-Latn)": 50.15, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 38.02, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 71.03, - "MasakhaNEWSClusteringP2P (run-Latn)": 58.28, - "MasakhaNEWSClusteringP2P (sna-Latn)": 59.25, - "MasakhaNEWSClusteringP2P (som-Latn)": 37.27, - "MasakhaNEWSClusteringP2P (swa-Latn)": 34.54, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 53.44, - "MasakhaNEWSClusteringP2P (xho-Latn)": 40.32, - "MasakhaNEWSClusteringP2P (yor-Latn)": 37.97, - "MasakhaNEWSClusteringP2P (fra)": 47.91, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 49.38, - "MasakhaNEWSClusteringS2S (eng)": 45.76, - "MasakhaNEWSClusteringS2S (fra-Latn)": 55.43, - "MasakhaNEWSClusteringS2S (hau-Latn)": 16.11, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 24.38, - "MasakhaNEWSClusteringS2S (lin-Latn)": 44.8, - "MasakhaNEWSClusteringS2S (lug-Latn)": 45.67, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.41, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 83.26, - "MasakhaNEWSClusteringS2S (run-Latn)": 48.77, - "MasakhaNEWSClusteringS2S (sna-Latn)": 43.9, - "MasakhaNEWSClusteringS2S (som-Latn)": 25.43, - "MasakhaNEWSClusteringS2S (swa-Latn)": 9.87, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 51.66, - "MasakhaNEWSClusteringS2S (xho-Latn)": 29.65, - "MasakhaNEWSClusteringS2S (yor-Latn)": 30.12, - "MasakhaNEWSClusteringS2S (fra)": 51.16, - "MedrxivClusteringP2P": 30.6, - "MedrxivClusteringS2S": 28.73, - "RedditClustering": 43.15, - "RedditClusteringP2P": 61.69, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 51.56, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.79, - "StackExchangeClustering": 55.31, - "StackExchangeClusteringP2P": 33.51, - "ThuNewsClusteringP2P": 40.98, - "ThuNewsClusteringS2S": 52.36, - "TwentyNewsgroupsClustering": 35.55 + "Model": "gtr-t5-large", + "ArxivClusteringP2P": 37.5, + "ArxivClusteringS2S": 30.55, + "BiorxivClusteringP2P": 29.59, + "BiorxivClusteringS2S": 25.72, + "MedrxivClusteringP2P": 28.72, + "MedrxivClusteringS2S": 27.39, + "RedditClustering": 61.69, + "RedditClusteringP2P": 61.67, + "StackExchangeClustering": 69.93, + "StackExchangeClusteringP2P": 33.21, + "TwentyNewsgroupsClustering": 51.64 } - ] - }, - "PairClassification": { - "max_ap": [ - { - "Model": "multilingual-e5-base", - "CDSC-E (pol-Latn)": 72.7, - "OpusparcusPC (deu-Latn)": 95.83, - "OpusparcusPC (en)": 98.71, - "OpusparcusPC (fin-Latn)": 90.3, - "OpusparcusPC (fra-Latn)": 92.12, - "OpusparcusPC (rus-Cyrl)": 86.82, - "OpusparcusPC (swe-Latn)": 93.05, - "PSC (pol-Latn)": 99.14, - "PawsXPairClassification (deu-Latn)": 54.11, - "PawsXPairClassification (en)": 55.79, - "PawsXPairClassification (spa-Latn)": 54.13, - "PawsXPairClassification (fra-Latn)": 56.01, - "PawsXPairClassification (jpn-Hira)": 49.02, - "PawsXPairClassification (kor-Hang)": 51.01, - "PawsXPairClassification (cmn-Hans)": 55.13, - "SICK-E-PL (pol-Latn)": 68.76, - "SprintDuplicateQuestions": 93.02, - "TERRa (rus-Cyrl)": 54.96, - "TwitterSemEval2015": 72.21, - "TwitterURLCorpus": 85.48 - }, - { - "Model": "multilingual-e5-base", - "CDSC-E (pol-Latn)": 72.7, - "CDSC-E": 72.67, - "Cmnli": 74.98, - "Ocnli": 60.47, - "OpusparcusPC (deu-Latn)": 95.83, - "OpusparcusPC (en)": 98.71, - "OpusparcusPC (fin-Latn)": 90.3, - "OpusparcusPC (fra-Latn)": 92.12, - "OpusparcusPC (rus-Cyrl)": 86.82, - "OpusparcusPC (swe-Latn)": 93.05, - "OpusparcusPC (fr)": 92.73, - "PPC": 88.01, - "PSC (pol-Latn)": 99.14, - "PSC": 99.14, - "PawsXPairClassification (deu-Latn)": 54.26, - "PawsXPairClassification (en)": 55.79, - "PawsXPairClassification (spa-Latn)": 54.13, - "PawsXPairClassification (fra-Latn)": 56.07, - "PawsXPairClassification (jpn-Hira)": 49.15, - "PawsXPairClassification (kor-Hang)": 51.01, - "PawsXPairClassification (cmn-Hans)": 55.13, - "PawsXPairClassification (fr)": 56.93, - "SICK-E-PL (pol-Latn)": 68.76, - "SICK-E-PL": 68.77, - "SprintDuplicateQuestions": 93.02, - "TERRa (rus-Cyrl)": 54.98, - "TwitterSemEval2015": 72.21, - "TwitterURLCorpus": 85.48 + ] + }, + "PairClassification": { + "max_ap": [ + { + "Model": "gtr-t5-large", + "SprintDuplicateQuestions": 95.05, + "TwitterSemEval2015": 76.03, + "TwitterURLCorpus": 84.89 }, { - "Model": "multilingual-e5-base", - "CDSC-E": 72.67, - "Cmnli": 74.51, - "Ocnli": 59.63, - "OpusparcusPC (fr)": 92.72, - "PPC": 88.01, - "PSC": 99.14, - "PawsXPairClassification (fr)": 56.93, - "SICK-E-PL": 68.77 + "Model": "gtr-t5-large", + "SprintDuplicateQuestions": 95.05, + "TwitterSemEval2015": 76.03, + "TwitterURLCorpus": 84.89 } ] }, "Reranking": { "map": [ { - "Model": "multilingual-e5-base", - "AlloprofReranking (fra-Latn)": 65.9, - "AlloprofReranking": 58.1, - "AskUbuntuDupQuestions": 59.28, - "CMedQAv1": 65.21, - "CMedQAv2": 66.06, - "MMarcoReranking (cmn-Hans)": 30.52, - "MMarcoReranking": 21.76, - "MindSmallReranking": 29.28, - "RuBQReranking (rus-Cyrl)": 72.01, - "SciDocsRR": 81.81, - "StackOverflowDupQuestions": 49.75, - "SyntecReranking (fra-Latn)": 85.31, - "SyntecReranking": 85.43, - "T2Reranking (cmn-Hans)": 64.86, - "T2Reranking": 64.39 - }, - { - "Model": "multilingual-e5-base", - "MIRACLReranking (rus-Cyrl)": 60.47 + "Model": "gtr-t5-large", + "AskUbuntuDupQuestions": 61.64, + "MindSmallReranking": 31.84, + "SciDocsRR": 76.39, + "StackOverflowDupQuestions": 51.58 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "multilingual-e5-base", - "AILACasedocs": 26.05, - "AILAStatutes": 20.37, - "ARCChallenge": 9.61, - "AlloprofRetrieval (fra-Latn)": 34.45, - "AlloprofRetrieval": 36.21, - "AlphaNLI": 16.44, - "AppsRetrieval (eng-Latn_python-Code)": 20.94, - "ArguAna": 44.21, - "ArguAna-PL (pol-Latn)": 42.86, - "ArguAna-PL": 42.81, - "BSARDRetrieval (fra-Latn)": 18.83, - "BSARDRetrieval": 0.0, - "CmedqaRetrieval (cmn-Hans)": 27.2, - "CmedqaRetrieval": 27.2, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 43.18, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 72.61, - "CodeSearchNetCCRetrieval (python-Code)": 85.37, - "CodeSearchNetCCRetrieval (javascript-Code)": 78.44, - "CodeSearchNetCCRetrieval (go-Code)": 72.29, - "CodeSearchNetCCRetrieval (ruby-Code)": 81.92, - "CodeSearchNetCCRetrieval (java-Code)": 78.85, - "CodeSearchNetCCRetrieval (php-Code)": 72.02, - "CodeSearchNetRetrieval (python-Code)": 85.68, - "CodeSearchNetRetrieval (javascript-Code)": 71.4, - "CodeSearchNetRetrieval (go-Code)": 89.47, - "CodeSearchNetRetrieval (ruby-Code)": 78.24, - "CodeSearchNetRetrieval (java-Code)": 78.39, - "CodeSearchNetRetrieval (php-Code)": 81.05, - "CodeTransOceanContest (python-Code_c++-Code)": 51.89, - "CodeTransOceanDL": 29.88, - "CosQA (eng-Latn_python-Code)": 31.12, - "CovidRetrieval (cmn-Hans)": 73.48, - "CovidRetrieval": 73.45, - "DBPedia-PL": 30.23, - "DuRetrieval (cmn-Hans)": 81.66, - "DuRetrieval": 81.64, - "EcomRetrieval (cmn-Hans)": 54.01, - "EcomRetrieval": 54.17, - "FiQA-PL (pol-Latn)": 25.59, - "FiQA-PL": 25.52, - "FiQA2018": 38.15, - "GerDaLIRSmall (deu-Latn)": 15.3, - "HellaSwag": 24.79, - "HotpotQA-PL": 63.52, - "LEMBNarrativeQARetrieval": 23.6, - "LEMBQMSumRetrieval": 25.16, - "LEMBSummScreenFDRetrieval": 68.21, - "LEMBWikimQARetrieval": 56.04, - "LeCaRDv2 (zho-Hans)": 59.0, - "LegalBenchConsumerContractsQA": 69.02, - "LegalBenchCorporateLobbying": 88.97, - "LegalQuAD (deu-Latn)": 47.85, - "LegalSummarization": 61.69, - "MIRACLRetrieval (rus-Cyrl)": 61.6, - "MMarcoRetrieval (cmn-Hans)": 76.01, - "MMarcoRetrieval": 76.04, - "MSMARCO-PL": 29.52, - "MedicalRetrieval (cmn-Hans)": 48.33, - "MedicalRetrieval": 48.35, - "MintakaRetrieval (ara-Arab)": 23.06, - "MintakaRetrieval (deu-Latn)": 29.8, - "MintakaRetrieval (spa-Latn)": 29.88, - "MintakaRetrieval (fra-Latn)": 30.96, - "MintakaRetrieval (hin-Deva)": 22.68, - "MintakaRetrieval (ita-Latn)": 29.77, - "MintakaRetrieval (jpn-Hira)": 22.98, - "MintakaRetrieval (por-Latn)": 30.62, - "MintakaRetrieval (fr)": 23.46, - "NFCorpus": 32.49, - "NFCorpus-PL (pol-Latn)": 25.99, - "NFCorpus-PL": 25.98, - "NQ-PL": 44.8, - "PIQA": 25.09, - "Quail": 3.52, - "Quora-PL": 81.22, - "RARbCode": 52.16, - "RARbMath": 65.35, - "RiaNewsRetrieval (rus-Cyrl)": 70.24, - "RuBQRetrieval (rus-Cyrl)": 69.58, - "SCIDOCS": 17.17, - "SCIDOCS-PL (pol-Latn)": 12.36, - "SCIDOCS-PL": 12.35, - "SIQA": 3.72, - "SciFact": 69.39, - "SciFact-PL (pol-Latn)": 62.26, - "SciFact-PL": 62.11, - "SpartQA": 7.91, - "StackOverflowQA": 85.11, - "SyntecRetrieval (fra-Latn)": 82.86, - "SyntecRetrieval": 80.49, - "SyntheticText2SQL (eng-Latn_sql-Code)": 53.61, - "T2Retrieval (cmn-Hans)": 70.77, - "T2Retrieval": 70.86, - "TRECCOVID": 69.5, - "TRECCOVID-PL (pol-Latn)": 65.94, - "TRECCOVID-PL": 66.06, - "TempReasonL1": 0.72, - "TempReasonL2Fact": 38.76, - "TempReasonL2Pure": 1.63, - "TempReasonL3Fact": 35.85, - "TempReasonL3Pure": 7.11, - "Touche2020": 21.5, - "VideoRetrieval (cmn-Hans)": 61.26, - "VideoRetrieval": 61.3, - "WinoGrande": 56.18, - "XPQARetrieval (ara-Arab_ara-Arab)": 39.97, - "XPQARetrieval (eng-Latn_ara-Arab)": 17.23, - "XPQARetrieval (ara-Arab_eng-Latn)": 34.35, - "XPQARetrieval (deu-Latn_deu-Latn)": 72.11, - "XPQARetrieval (eng-Latn_deu-Latn)": 28.91, - "XPQARetrieval (deu-Latn_eng-Latn)": 61.46, - "XPQARetrieval (spa-Latn_spa-Latn)": 58.35, - "XPQARetrieval (eng-Latn_spa-Latn)": 25.27, - "XPQARetrieval (spa-Latn_eng-Latn)": 51.07, - "XPQARetrieval (fra-Latn_fra-Latn)": 59.56, - "XPQARetrieval (eng-Latn_fra-Latn)": 23.69, - "XPQARetrieval (fra-Latn_eng-Latn)": 53.9, - "XPQARetrieval (hin-Deva_hin-Deva)": 70.59, - "XPQARetrieval (eng-Latn_hin-Deva)": 27.57, - "XPQARetrieval (hin-Deva_eng-Latn)": 63.69, - "XPQARetrieval (ita-Latn_ita-Latn)": 70.38, - "XPQARetrieval (eng-Latn_ita-Latn)": 26.06, - "XPQARetrieval (ita-Latn_eng-Latn)": 56.2, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 71.97, - "XPQARetrieval (eng-Latn_jpn-Hira)": 17.63, - "XPQARetrieval (jpn-Hira_eng-Latn)": 61.03, - "XPQARetrieval (kor-Hang_kor-Hang)": 36.12, - "XPQARetrieval (eng-Latn_kor-Hang)": 20.15, - "XPQARetrieval (kor-Hang_eng-Latn)": 29.27, - "XPQARetrieval (pol-Latn_pol-Latn)": 48.05, - "XPQARetrieval (eng-Latn_pol-Latn)": 19.48, - "XPQARetrieval (pol-Latn_eng-Latn)": 40.18, - "XPQARetrieval (por-Latn_por-Latn)": 44.78, - "XPQARetrieval (eng-Latn_por-Latn)": 17.66, - "XPQARetrieval (por-Latn_eng-Latn)": 40.58, - "XPQARetrieval (tam-Taml_tam-Taml)": 35.21, - "XPQARetrieval (eng-Latn_tam-Taml)": 12.64, - "XPQARetrieval (tam-Taml_eng-Latn)": 26.73, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 67.06, - "XPQARetrieval (eng-Latn_cmn-Hans)": 12.72, - "XPQARetrieval (cmn-Hans_eng-Latn)": 53.53, - "XPQARetrieval (fr)": 65.81 - }, - { - "Model": "multilingual-e5-base", - "LEMBNeedleRetrieval": 32.0, - "LEMBPasskeyRetrieval": 38.25 + "Model": "gtr-t5-large", + "ArguAna": 52.09, + "CQADupstackRetrieval": 36.62, + "ClimateFEVER": 26.9, + "DBPedia": 39.55, + "FEVER": 72.66, + "FiQA2018": 42.79, + "HotpotQA": 57.85, + "MSMARCO": 42.73, + "NFCorpus": 32.63, + "NQ": 55.09, + "QuoraRetrieval": 88.47, + "SCIDOCS": 15.51, + "SciFact": 63.42, + "TRECCOVID": 56.68, + "Touche2020": 28.29 } ] }, "STS": { "cosine_spearman": [ { - "Model": "multilingual-e5-base", - "AFQMC (cmn-Hans)": 29.66, - "ATEC (cmn-Hans)": 37.01, - "BIOSSES": 85.05, - "BQ (cmn-Hans)": 45.45, - "CDSC-R (pol-Latn)": 90.09, - "LCQMC (cmn-Hans)": 74.15, - "PAWSX (cmn-Hans)": 12.13, - "RUParaPhraserSTS (rus-Cyrl)": 70.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 79.64, - "SICK-R": 78.51, - "SICK-R-PL (pol-Latn)": 71.23, - "SICKFr (fra-Latn)": 75.76, - "STS12": 76.7, - "STS13": 78.02, - "STS14": 76.6, - "STS15": 88.16, - "STS16": 84.28, - "STS17 (eng-Latn_deu-Latn)": 82.08, - "STS17 (spa-Latn_eng-Latn)": 76.56, - "STS17 (fra-Latn_eng-Latn)": 80.18, - "STS17 (eng-Latn_tur-Latn)": 63.3, - "STS17 (kor-Hang)": 79.95, - "STS17 (eng-Latn_ara-Arab)": 71.27, - "STS17 (en-en)": 87.84, - "STS17 (spa-Latn)": 86.74, - "STS17 (ara-Arab)": 74.48, - "STS17 (ita-Latn_eng-Latn)": 80.16, - "STS17 (nld-Latn_eng-Latn)": 79.29, - "STS22 (fra-Latn)": 75.04, - "STS22 (cmn-Hans_eng-Latn)": 69.8, - "STS22 (ara-Arab)": 57.82, - "STS22 (spa-Latn_ita-Latn)": 66.43, - "STS22 (ita-Latn)": 77.76, - "STS22 (fra-Latn_pol-Latn)": 73.25, - "STS22 (deu-Latn_eng-Latn)": 54.89, - "STS22 (pol-Latn_eng-Latn)": 70.37, - "STS22 (cmn-Hans)": 65.63, - "STS22 (deu-Latn)": 55.95, - "STS22 (pol-Latn)": 34.08, - "STS22 (deu-Latn_fra-Latn)": 59.68, - "STS22 (spa-Latn)": 66.67, - "STS22 (rus-Cyrl)": 60.67, - "STS22 (spa-Latn_eng-Latn)": 74.0, - "STS22 (en)": 62.26, - "STS22 (tur-Latn)": 63.71, - "STS22 (deu-Latn_pol-Latn)": 39.35, - "STSB (cmn-Hans)": 79.04, - "STSBenchmark": 85.64, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 79.87, - "STSBenchmarkMultilingualSTS (deu-Latn)": 79.68, - "STSBenchmarkMultilingualSTS (spa-Latn)": 81.75, - "STSBenchmarkMultilingualSTS (en)": 85.64, - "STSBenchmarkMultilingualSTS (fra-Latn)": 80.85, - "STSBenchmarkMultilingualSTS (pol-Latn)": 74.93, - "STSBenchmarkMultilingualSTS (por-Latn)": 67.16, - "STSBenchmarkMultilingualSTS (nld-Latn)": 75.96, - "STSBenchmarkMultilingualSTS (ita-Latn)": 78.09, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 79.33 - }, - { - "Model": "multilingual-e5-base", - "AFQMC (cmn-Hans)": 29.66, - "ATEC (cmn-Hans)": 37.01, - "BIOSSES": 85.05, - "BQ (cmn-Hans)": 45.45, - "CDSC-R (pol-Latn)": 90.09, - "LCQMC (cmn-Hans)": 74.15, - "PAWSX (cmn-Hans)": 12.13, - "RUParaPhraserSTS (rus-Cyrl)": 70.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 79.64, - "SICK-R": 78.51, - "SICK-R-PL (pol-Latn)": 71.23, - "SICKFr (fra-Latn)": 75.76, - "STS12": 76.7, - "STS13": 78.02, - "STS14": 76.6, - "STS15": 88.16, - "STS16": 84.28, - "STS17 (eng-Latn_deu-Latn)": 82.08, - "STS17 (spa-Latn_eng-Latn)": 76.56, - "STS17 (fra-Latn_eng-Latn)": 80.18, - "STS17 (eng-Latn_tur-Latn)": 63.3, - "STS17 (kor-Hang)": 79.95, - "STS17 (eng-Latn_ara-Arab)": 71.27, - "STS17 (en-en)": 87.84, - "STS17 (spa-Latn)": 86.74, - "STS17 (ara-Arab)": 74.48, - "STS17 (ita-Latn_eng-Latn)": 80.16, - "STS17 (nld-Latn_eng-Latn)": 79.29, - "STS22 (fra-Latn)": 75.04, - "STS22 (cmn-Hans_eng-Latn)": 69.8, - "STS22 (ara-Arab)": 57.82, - "STS22 (spa-Latn_ita-Latn)": 66.43, - "STS22 (ita-Latn)": 77.76, - "STS22 (fra-Latn_pol-Latn)": 73.25, - "STS22 (deu-Latn_eng-Latn)": 54.89, - "STS22 (pol-Latn_eng-Latn)": 70.37, - "STS22 (cmn-Hans)": 65.63, - "STS22 (deu-Latn)": 55.95, - "STS22 (pol-Latn)": 34.08, - "STS22 (deu-Latn_fra-Latn)": 59.68, - "STS22 (spa-Latn)": 66.67, - "STS22 (rus-Cyrl)": 60.67, - "STS22 (spa-Latn_eng-Latn)": 74.0, - "STS22 (en)": 62.26, - "STS22 (tur-Latn)": 63.71, - "STS22 (deu-Latn_pol-Latn)": 39.35, - "STSB (cmn-Hans)": 79.04, - "STSBenchmark": 85.64, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 79.87, - "STSBenchmarkMultilingualSTS (deu-Latn)": 79.68, - "STSBenchmarkMultilingualSTS (spa-Latn)": 81.75, - "STSBenchmarkMultilingualSTS (en)": 85.64, - "STSBenchmarkMultilingualSTS (fra-Latn)": 80.85, - "STSBenchmarkMultilingualSTS (pol-Latn)": 74.93, - "STSBenchmarkMultilingualSTS (por-Latn)": 67.16, - "STSBenchmarkMultilingualSTS (nld-Latn)": 75.96, - "STSBenchmarkMultilingualSTS (ita-Latn)": 78.09, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 79.33 - }, - { - "Model": "multilingual-e5-base", - "AFQMC": 29.67, - "ATEC": 37.01, - "BQ": 45.45, - "CDSC-R": 90.08, - "LCQMC": 74.15, - "PAWSX": 12.14, - "QBQTC": 28.81, - "SICK-R-PL": 71.23, - "SICKFr": 76.23, - "STS22 (zh)": 65.64, - "STS22 (pl)": 34.07, - "STSB": 79.05, - "STSBenchmarkMultilingualSTS (fr)": 80.62 + "Model": "gtr-t5-large", + "BIOSSES": 84.86, + "SICK-R": 73.39, + "STS12": 70.33, + "STS13": 82.19, + "STS14": 77.16, + "STS15": 86.31, + "STS16": 81.85, + "STS17": 47.48, + "STS22": 29.42, + "STSBenchmark": 77.6 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "multilingual-e5-base", - "SummEval": 30.23, - "SummEvalFr (fra-Latn)": 32.96 - }, - { - "Model": "multilingual-e5-base", - "SummEval": 30.23, - "SummEvalFr (fra-Latn)": 32.96 - }, - { - "Model": "multilingual-e5-base", - "SummEvalFr": 30.76 + "Model": "gtr-t5-large", + "SummEval": 29.5 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "multilingual-e5-base", - "CEDRClassification (rus-Cyrl)": 42.32, - "SensitiveTopicsClassification (rus-Cyrl)": 24.98 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "multilingual-e5-base", - "Core17InstructionRetrieval": 0.61, - "News21InstructionRetrieval": -1.14, - "Robust04InstructionRetrieval": -7.43 - } - ] + "p-MRR": [] } }, - "bert-base-25lang-cased": { + "gtr-t5-xl": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "gtr-t5-xl", + "BUCC": 1.49, + "Tatoeba": 29.74 + } + ] }, "Classification": { "accuracy": [ { - "Model": "bert-base-25lang-cased", - "AmazonReviewsClassification (fr)": 29.39, - "MTOPDomainClassification (fr)": 63.63, - "MTOPIntentClassification (fr)": 37.86, - "MasakhaNEWSClassification (fra)": 63.91, - "MassiveIntentClassification (fr)": 37.3, - "MassiveScenarioClassification (fr)": 44.47 + "Model": "gtr-t5-xl", + "AmazonCounterfactualClassification": 50.59, + "AmazonPolarityClassification": 74.58, + "AmazonReviewsClassification": 21.89, + "Banking77Classification": 82.22, + "EmotionClassification": 45.54, + "ImdbClassification": 68.15, + "MTOPDomainClassification": 15.87, + "MTOPIntentClassification": 5.55, + "MassiveIntentClassification": 3.35, + "MassiveScenarioClassification": 8.77, + "ToxicConversationsClassification": 67.56, + "TweetSentimentExtractionClassification": 54.77 } ] }, "Clustering": { "v_measure": [ { - "Model": "bert-base-25lang-cased", - "AlloProfClusteringP2P": 53.49, - "AlloProfClusteringS2S": 43.1, - "HALClusteringS2S": 19.78, - "MLSUMClusteringP2P": 40.73, - "MLSUMClusteringS2S": 31.94, - "MasakhaNEWSClusteringP2P (fra)": 24.23, - "MasakhaNEWSClusteringS2S (fra)": 24.46 + "Model": "gtr-t5-xl", + "ArxivClusteringP2P": 37.9, + "ArxivClusteringS2S": 30.45, + "BiorxivClusteringP2P": 30.52, + "BiorxivClusteringS2S": 26.06, + "MedrxivClusteringP2P": 28.69, + "MedrxivClusteringS2S": 26.69, + "RedditClustering": 61.34, + "RedditClusteringP2P": 61.11, + "StackExchangeClustering": 69.95, + "StackExchangeClusteringP2P": 32.73, + "TwentyNewsgroupsClustering": 51.15 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bert-base-25lang-cased", - "OpusparcusPC (fr)": 86.79, - "PawsXPairClassification (fr)": 53.39 + "Model": "gtr-t5-xl", + "SprintDuplicateQuestions": 95.45, + "TwitterSemEval2015": 77.81, + "TwitterURLCorpus": 85.14 }, { - "Model": "bert-base-25lang-cased", - "OpusparcusPC (fr)": 87.78, - "PawsXPairClassification (fr)": 53.4 + "Model": "gtr-t5-xl", + "SprintDuplicateQuestions": 95.45, + "TwitterSemEval2015": 77.81, + "TwitterURLCorpus": 85.14 } ] }, "Reranking": { "map": [ { - "Model": "bert-base-25lang-cased", - "AlloprofReranking": 36.25, - "SyntecReranking": 53.25 + "Model": "gtr-t5-xl", + "AskUbuntuDupQuestions": 63.08, + "MindSmallReranking": 31.5, + "SciDocsRR": 76.49, + "StackOverflowDupQuestions": 52.79 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-25lang-cased", - "AlloprofRetrieval": 1.6, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 3.55, - "SyntecRetrieval": 18.95, - "XPQARetrieval (fr)": 18.46 + "Model": "gtr-t5-xl", + "ArguAna": 52.81, + "CQADupstackRetrieval": 37.35, + "ClimateFEVER": 27.01, + "DBPedia": 39.74, + "FEVER": 72.18, + "FiQA2018": 44.19, + "HotpotQA": 58.91, + "MSMARCO": 43.52, + "NFCorpus": 33.34, + "NQ": 56.16, + "QuoraRetrieval": 88.91, + "SCIDOCS": 15.71, + "SciFact": 64.2, + "TRECCOVID": 60.09, + "Touche2020": 25.26 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bert-base-25lang-cased", - "SICKFr": 58.76, - "STS22 (fr)": 38.77, - "STSBenchmarkMultilingualSTS (fr)": 52.25 + "Model": "gtr-t5-xl", + "BIOSSES": 78.94, + "SICK-R": 73.63, + "STS12": 69.11, + "STS13": 81.82, + "STS14": 77.07, + "STS15": 86.01, + "STS16": 82.23, + "STS17": 56.91, + "STS22": 28.85, + "STSBenchmark": 77.65 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "bert-base-25lang-cased", - "SummEvalFr": 28.84 + "Model": "gtr-t5-xl", + "SummEval": 30.21 } ] }, @@ -12266,110 +11316,121 @@ "p-MRR": [] } }, - "text-similarity-davinci-001": { + "gtr-t5-xxl": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "gtr-t5-xxl", + "AmazonCounterfactualClassification": 67.3, + "AmazonPolarityClassification": 75.05, + "AmazonReviewsClassification": 37.3, + "Banking77Classification": 82.32, + "EmotionClassification": 43.19, + "ImdbClassification": 70.8, + "MTOPDomainClassification": 93.84, + "MTOPIntentClassification": 67.71, + "MassiveIntentClassification": 70.61, + "MassiveScenarioClassification": 77.77, + "ToxicConversationsClassification": 68.48, + "TweetSentimentExtractionClassification": 54.54 + } + ] }, "Clustering": { "v_measure": [ { - "Model": "text-similarity-davinci-001", - "RedditClustering": 31.78, - "StackExchangeClustering": 36.86, - "TwentyNewsgroupsClustering": 29.33 + "Model": "gtr-t5-xxl", + "ArxivClusteringP2P": 37.9, + "ArxivClusteringS2S": 32.39, + "BiorxivClusteringP2P": 30.48, + "BiorxivClusteringS2S": 27.5, + "MedrxivClusteringP2P": 29.12, + "MedrxivClusteringS2S": 27.56, + "RedditClustering": 64.13, + "RedditClusteringP2P": 62.84, + "StackExchangeClustering": 71.43, + "StackExchangeClusteringP2P": 32.85, + "TwentyNewsgroupsClustering": 50.44 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-similarity-davinci-001", - "SprintDuplicateQuestions": 69.52, - "TwitterSemEval2015": 74.42, - "TwitterURLCorpus": 83.75 + "Model": "gtr-t5-xxl", + "SprintDuplicateQuestions": 95.68, + "TwitterSemEval2015": 77.54, + "TwitterURLCorpus": 85.13 + }, + { + "Model": "gtr-t5-xxl", + "SprintDuplicateQuestions": 95.68, + "TwitterSemEval2015": 77.54, + "TwitterURLCorpus": 85.13 } ] }, "Reranking": { "map": [ { - "Model": "text-similarity-davinci-001", - "AskUbuntuDupQuestions": 53.56, - "SciDocsRR": 68.7, - "StackOverflowDupQuestions": 39.41 + "Model": "gtr-t5-xxl", + "AskUbuntuDupQuestions": 63.23, + "MindSmallReranking": 31.93, + "SciDocsRR": 77.96, + "StackOverflowDupQuestions": 53.5 } ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "gtr-t5-xxl", + "ArguAna": 53.77, + "CQADupstackRetrieval": 38.56, + "ClimateFEVER": 27.21, + "DBPedia": 41.28, + "FEVER": 74.08, + "FiQA2018": 46.78, + "HotpotQA": 59.67, + "MSMARCO": 44.05, + "NFCorpus": 34.18, + "NQ": 57.24, + "QuoraRetrieval": 89.09, + "SCIDOCS": 15.88, + "SciFact": 66.77, + "TRECCOVID": 51.9, + "Touche2020": 26.76 + } + ] }, "STS": { "cosine_spearman": [ { - "Model": "text-similarity-davinci-001", - "BIOSSES": 68.95, - "SICK-R": 78.72, - "STSBenchmark": 84.08 + "Model": "gtr-t5-xxl", + "BIOSSES": 81.91, + "SICK-R": 74.29, + "STS12": 70.12, + "STS13": 82.72, + "STS14": 78.24, + "STS15": 86.26, + "STS16": 81.61, + "STS17": 85.18, + "STS22": 65.76, + "STSBenchmark": 77.73 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "bge-m3-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "cosine_spearman": [ { - "Model": "bge-m3-instruct", - "ARCChallenge": 9.03, - "AlphaNLI": 24.69, - "HellaSwag": 25.55, - "PIQA": 19.03, - "Quail": 7.08, - "RARbCode": 39.58, - "RARbMath": 64.51, - "SIQA": 4.77, - "SpartQA": 7.0, - "TempReasonL1": 0.8, - "TempReasonL2Fact": 34.99, - "TempReasonL2Pure": 0.62, - "TempReasonL3Fact": 32.47, - "TempReasonL3Pure": 7.01, - "WinoGrande": 35.33 + "Model": "gtr-t5-xxl", + "SummEval": 30.64 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, @@ -12377,120 +11438,83 @@ "p-MRR": [] } }, - "bert-base-uncased": { + "herbert-base-retrieval-v2": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bert-base-uncased", - "AmazonCounterfactualClassification (en)": 74.25, - "AmazonPolarityClassification": 71.33, - "AmazonReviewsClassification (en)": 33.56, - "Banking77Classification": 63.41, - "EmotionClassification": 35.28, - "ImdbClassification": 65.35, - "MTOPDomainClassification (en)": 82.63, - "MTOPIntentClassification (en)": 68.14, - "MassiveIntentClassification (en)": 59.88, - "MassiveScenarioClassification (en)": 64.28, - "ToxicConversationsClassification": 70.0, - "TweetSentimentExtractionClassification": 51.81 + "Model": "herbert-base-retrieval-v2", + "AllegroReviews": 34.11, + "CBD": 68.35, + "MassiveIntentClassification": 65.53, + "MassiveScenarioClassification": 68.51, + "PAC": 68.4, + "PolEmo2.0-IN": 64.18, + "PolEmo2.0-OUT": 45.73 } ] }, "Clustering": { "v_measure": [ { - "Model": "bert-base-uncased", - "ArxivClusteringP2P": 35.19, - "ArxivClusteringS2S": 27.51, - "BiorxivClusteringP2P": 30.12, - "BiorxivClusteringS2S": 24.77, - "MedrxivClusteringP2P": 26.09, - "MedrxivClusteringS2S": 23.6, - "RedditClustering": 27.24, - "RedditClusteringP2P": 43.32, - "StackExchangeClustering": 43.58, - "StackExchangeClusteringP2P": 26.55, - "TwentyNewsgroupsClustering": 23.35 + "Model": "herbert-base-retrieval-v2", + "8TagsClustering": 28.15 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bert-base-uncased", - "SprintDuplicateQuestions": 36.81, - "TwitterSemEval2015": 55.9, - "TwitterURLCorpus": 76.29 + "Model": "herbert-base-retrieval-v2", + "CDSC-E": 63.31, + "PPC": 84.18, + "PSC": 98.87, + "SICK-E-PL": 54.93 }, { - "Model": "bert-base-uncased", - "SprintDuplicateQuestions": 36.81, - "TwitterSemEval2015": 55.9, - "TwitterURLCorpus": 76.29 + "Model": "herbert-base-retrieval-v2", + "CDSC-E": 63.61, + "PPC": 84.37, + "PSC": 98.98, + "SICK-E-PL": 54.95 } ] }, "Reranking": { - "map": [ - { - "Model": "bert-base-uncased", - "AskUbuntuDupQuestions": 45.84, - "MindSmallReranking": 28.37, - "SciDocsRR": 64.94, - "StackOverflowDupQuestions": 34.62 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-uncased", - "ArguAna": 28.29, - "CQADupstackRetrieval": 5.51, - "ClimateFEVER": 5.41, - "DBPedia": 4.13, - "FEVER": 3.3, - "FiQA2018": 2.19, - "HotpotQA": 8.26, - "MSMARCO": 1.91, - "NFCorpus": 4.3, - "NQ": 2.62, - "QuoraRetrieval": 61.03, - "SCIDOCS": 2.82, - "SciFact": 13.34, - "TRECCOVID": 14.74, - "Touche2020": 0.97 + "Model": "herbert-base-retrieval-v2", + "ArguAna-PL": 41.97, + "DBPedia-PL": 24.07, + "FiQA-PL": 24.25, + "HotpotQA-PL": 43.41, + "MSMARCO-PL": 51.56, + "NFCorpus-PL": 25.95, + "NQ-PL": 35.09, + "Quora-PL": 78.86, + "SCIDOCS-PL": 11.0, + "SciFact-PL": 51.92, + "TRECCOVID-PL": 42.64 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bert-base-uncased", - "BIOSSES": 54.7, - "SICK-R": 58.65, - "STS12": 30.87, - "STS13": 59.89, - "STS14": 47.73, - "STS15": 60.29, - "STS16": 63.73, - "STS17 (en-en)": 64.1, - "STS22 (en)": 56.37, - "STSBenchmark": 47.29 + "Model": "herbert-base-retrieval-v2", + "CDSC-R": 86.18, + "SICK-R-PL": 64.67, + "STS22": 39.73 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "bert-base-uncased", - "SummEval": 29.82 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -12499,7 +11523,7 @@ "p-MRR": [] } }, - "gelectra-large": { + "instructor-base": { "BitextMining": { "f1": [] }, @@ -12507,15 +11531,7 @@ "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "gelectra-large", - "BlurbsClusteringP2P": 13.96, - "BlurbsClusteringS2S": 7.57, - "TenKGnadClusteringP2P": 11.49, - "TenKGnadClusteringS2S": 3.91 - } - ] + "v_measure": [] }, "PairClassification": { "max_ap": [] @@ -12536,414 +11552,69 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] - } - }, - "sentence-t5-large": { - "BitextMining": { - "f1": [ + "p-MRR": [ { - "Model": "sentence-t5-large", - "BUCC (de-en)": 87.0, - "BUCC (fr-en)": 88.91, - "BUCC (ru-en)": 0.44, - "BUCC (zh-en)": 0.95, - "Tatoeba (afr-eng)": 23.7, - "Tatoeba (amh-eng)": 0.65, - "Tatoeba (ang-eng)": 30.98, - "Tatoeba (ara-eng)": 0.48, - "Tatoeba (arq-eng)": 0.68, - "Tatoeba (arz-eng)": 0.22, - "Tatoeba (ast-eng)": 55.3, - "Tatoeba (awa-eng)": 1.03, - "Tatoeba (aze-eng)": 5.83, - "Tatoeba (bel-eng)": 1.66, - "Tatoeba (ben-eng)": 0.0, - "Tatoeba (ber-eng)": 5.62, - "Tatoeba (bos-eng)": 12.23, - "Tatoeba (bre-eng)": 5.84, - "Tatoeba (bul-eng)": 1.35, - "Tatoeba (cat-eng)": 48.56, - "Tatoeba (cbk-eng)": 46.97, - "Tatoeba (ceb-eng)": 9.79, - "Tatoeba (ces-eng)": 6.0, - "Tatoeba (cha-eng)": 24.21, - "Tatoeba (cmn-eng)": 2.26, - "Tatoeba (cor-eng)": 4.03, - "Tatoeba (csb-eng)": 9.53, - "Tatoeba (cym-eng)": 9.17, - "Tatoeba (dan-eng)": 34.63, - "Tatoeba (deu-eng)": 89.31, - "Tatoeba (dsb-eng)": 9.68, - "Tatoeba (dtp-eng)": 4.66, - "Tatoeba (ell-eng)": 0.77, - "Tatoeba (epo-eng)": 26.88, - "Tatoeba (est-eng)": 5.19, - "Tatoeba (eus-eng)": 9.46, - "Tatoeba (fao-eng)": 21.59, - "Tatoeba (fin-eng)": 5.66, - "Tatoeba (fra-eng)": 79.71, - "Tatoeba (fry-eng)": 28.29, - "Tatoeba (gla-eng)": 2.34, - "Tatoeba (gle-eng)": 3.55, - "Tatoeba (glg-eng)": 56.25, - "Tatoeba (gsw-eng)": 24.25, - "Tatoeba (heb-eng)": 0.57, - "Tatoeba (hin-eng)": 0.12, - "Tatoeba (hrv-eng)": 10.29, - "Tatoeba (hsb-eng)": 9.52, - "Tatoeba (hun-eng)": 6.22, - "Tatoeba (hye-eng)": 0.81, - "Tatoeba (ido-eng)": 41.11, - "Tatoeba (ile-eng)": 54.0, - "Tatoeba (ina-eng)": 75.47, - "Tatoeba (ind-eng)": 13.02, - "Tatoeba (isl-eng)": 8.98, - "Tatoeba (ita-eng)": 67.23, - "Tatoeba (jav-eng)": 8.54, - "Tatoeba (jpn-eng)": 0.99, - "Tatoeba (kab-eng)": 1.85, - "Tatoeba (kat-eng)": 1.37, - "Tatoeba (kaz-eng)": 0.67, - "Tatoeba (khm-eng)": 0.56, - "Tatoeba (kor-eng)": 1.73, - "Tatoeba (kur-eng)": 9.23, - "Tatoeba (kzj-eng)": 5.38, - "Tatoeba (lat-eng)": 21.3, - "Tatoeba (lfn-eng)": 40.48, - "Tatoeba (lit-eng)": 5.38, - "Tatoeba (lvs-eng)": 6.83, - "Tatoeba (mal-eng)": 0.45, - "Tatoeba (mar-eng)": 0.01, - "Tatoeba (max-eng)": 16.44, - "Tatoeba (mhr-eng)": 0.33, - "Tatoeba (mkd-eng)": 0.4, - "Tatoeba (mon-eng)": 2.48, - "Tatoeba (nds-eng)": 34.66, - "Tatoeba (nld-eng)": 42.72, - "Tatoeba (nno-eng)": 24.08, - "Tatoeba (nob-eng)": 34.17, - "Tatoeba (nov-eng)": 55.01, - "Tatoeba (oci-eng)": 29.15, - "Tatoeba (orv-eng)": 0.2, - "Tatoeba (pam-eng)": 6.99, - "Tatoeba (pes-eng)": 0.9, - "Tatoeba (pms-eng)": 30.8, - "Tatoeba (pol-eng)": 12.81, - "Tatoeba (por-eng)": 73.45, - "Tatoeba (ron-eng)": 54.86, - "Tatoeba (rus-eng)": 2.43, - "Tatoeba (slk-eng)": 8.35, - "Tatoeba (slv-eng)": 9.3, - "Tatoeba (spa-eng)": 78.87, - "Tatoeba (sqi-eng)": 11.74, - "Tatoeba (srp-eng)": 5.83, - "Tatoeba (swe-eng)": 35.41, - "Tatoeba (swg-eng)": 28.18, - "Tatoeba (swh-eng)": 7.53, - "Tatoeba (tam-eng)": 0.36, - "Tatoeba (tat-eng)": 1.01, - "Tatoeba (tel-eng)": 1.1, - "Tatoeba (tgl-eng)": 12.4, - "Tatoeba (tha-eng)": 1.58, - "Tatoeba (tuk-eng)": 4.95, - "Tatoeba (tur-eng)": 6.45, - "Tatoeba (tzl-eng)": 37.82, - "Tatoeba (uig-eng)": 0.67, - "Tatoeba (ukr-eng)": 1.88, - "Tatoeba (urd-eng)": 0.0, - "Tatoeba (uzb-eng)": 4.79, - "Tatoeba (vie-eng)": 7.03, - "Tatoeba (war-eng)": 9.68, - "Tatoeba (wuu-eng)": 1.28, - "Tatoeba (xho-eng)": 10.64, - "Tatoeba (yid-eng)": 0.57, - "Tatoeba (yue-eng)": 0.88, - "Tatoeba (zsm-eng)": 14.67 + "Model": "instructor-base", + "Core17InstructionRetrieval": -1.09, + "News21InstructionRetrieval": -1.78, + "Robust04InstructionRetrieval": -10.42 } ] + } + }, + "instructor-large": { + "BitextMining": { + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "sentence-t5-large", - "AmazonCounterfactualClassification (de)": 67.97, - "AmazonCounterfactualClassification (en)": 75.51, - "AmazonCounterfactualClassification (en-ext)": 75.44, - "AmazonCounterfactualClassification (ja)": 45.72, - "AmazonPolarityClassification": 92.87, - "AmazonReviewsClassification (de)": 43.16, - "AmazonReviewsClassification (en)": 47.12, - "AmazonReviewsClassification (es)": 42.89, - "AmazonReviewsClassification (fr)": 41.48, - "AmazonReviewsClassification (ja)": 22.49, - "AmazonReviewsClassification (zh)": 22.12, - "Banking77Classification": 78.46, - "EmotionClassification": 51.74, - "ImdbClassification": 87.01, - "MTOPDomainClassification (de)": 80.56, - "MTOPDomainClassification (en)": 90.99, - "MTOPDomainClassification (es)": 80.78, - "MTOPDomainClassification (fr)": 79.6, - "MTOPDomainClassification (hi)": 21.22, - "MTOPDomainClassification (th)": 15.82, - "MTOPIntentClassification (de)": 52.5, - "MTOPIntentClassification (en)": 64.98, - "MTOPIntentClassification (es)": 52.07, - "MTOPIntentClassification (fr)": 47.73, - "MTOPIntentClassification (hi)": 3.74, - "MTOPIntentClassification (th)": 4.96, - "MasakhaNEWSClassification (fra)": 80.43, - "MassiveIntentClassification (af)": 38.41, - "MassiveIntentClassification (am)": 2.49, - "MassiveIntentClassification (ar)": 4.7, - "MassiveIntentClassification (az)": 31.77, - "MassiveIntentClassification (bn)": 2.77, - "MassiveIntentClassification (cy)": 31.69, - "MassiveIntentClassification (da)": 41.76, - "MassiveIntentClassification (de)": 52.01, - "MassiveIntentClassification (el)": 9.74, - "MassiveIntentClassification (en)": 71.78, - "MassiveIntentClassification (es)": 54.1, - "MassiveIntentClassification (fa)": 3.86, - "MassiveIntentClassification (fi)": 34.07, - "MassiveIntentClassification (fr)": 57.01, - "MassiveIntentClassification (he)": 2.14, - "MassiveIntentClassification (hi)": 2.97, - "MassiveIntentClassification (hu)": 32.01, - "MassiveIntentClassification (hy)": 3.17, - "MassiveIntentClassification (id)": 34.55, - "MassiveIntentClassification (is)": 32.0, - "MassiveIntentClassification (it)": 52.94, - "MassiveIntentClassification (ja)": 2.9, - "MassiveIntentClassification (jv)": 32.42, - "MassiveIntentClassification (ka)": 2.71, - "MassiveIntentClassification (km)": 5.5, - "MassiveIntentClassification (kn)": 2.41, - "MassiveIntentClassification (ko)": 2.57, - "MassiveIntentClassification (lv)": 35.09, - "MassiveIntentClassification (ml)": 2.95, - "MassiveIntentClassification (mn)": 18.33, - "MassiveIntentClassification (ms)": 29.69, - "MassiveIntentClassification (my)": 3.99, - "MassiveIntentClassification (nb)": 41.29, - "MassiveIntentClassification (nl)": 44.95, - "MassiveIntentClassification (pl)": 37.67, - "MassiveIntentClassification (pt)": 51.96, - "MassiveIntentClassification (ro)": 43.83, - "MassiveIntentClassification (ru)": 17.32, - "MassiveIntentClassification (sl)": 33.71, - "MassiveIntentClassification (sq)": 37.62, - "MassiveIntentClassification (sv)": 40.67, - "MassiveIntentClassification (sw)": 31.9, - "MassiveIntentClassification (ta)": 1.91, - "MassiveIntentClassification (te)": 2.54, - "MassiveIntentClassification (th)": 3.85, - "MassiveIntentClassification (tl)": 36.83, - "MassiveIntentClassification (tr)": 33.0, - "MassiveIntentClassification (ur)": 2.62, - "MassiveIntentClassification (vi)": 22.81, - "MassiveIntentClassification (zh-CN)": 1.09, - "MassiveIntentClassification (zh-TW)": 3.49, - "MassiveScenarioClassification (af)": 50.28, - "MassiveScenarioClassification (am)": 7.15, - "MassiveScenarioClassification (ar)": 12.12, - "MassiveScenarioClassification (az)": 39.68, - "MassiveScenarioClassification (bn)": 8.06, - "MassiveScenarioClassification (cy)": 38.01, - "MassiveScenarioClassification (da)": 51.44, - "MassiveScenarioClassification (de)": 62.71, - "MassiveScenarioClassification (el)": 17.19, - "MassiveScenarioClassification (en)": 73.16, - "MassiveScenarioClassification (es)": 59.56, - "MassiveScenarioClassification (fa)": 6.5, - "MassiveScenarioClassification (fi)": 41.72, - "MassiveScenarioClassification (fr)": 63.6, - "MassiveScenarioClassification (he)": 7.93, - "MassiveScenarioClassification (hi)": 7.85, - "MassiveScenarioClassification (hu)": 41.37, - "MassiveScenarioClassification (hy)": 9.42, - "MassiveScenarioClassification (id)": 44.88, - "MassiveScenarioClassification (is)": 40.86, - "MassiveScenarioClassification (it)": 60.09, - "MassiveScenarioClassification (ja)": 6.56, - "MassiveScenarioClassification (jv)": 40.18, - "MassiveScenarioClassification (ka)": 7.37, - "MassiveScenarioClassification (km)": 9.56, - "MassiveScenarioClassification (kn)": 8.4, - "MassiveScenarioClassification (ko)": 5.96, - "MassiveScenarioClassification (lv)": 41.44, - "MassiveScenarioClassification (ml)": 7.47, - "MassiveScenarioClassification (mn)": 25.36, - "MassiveScenarioClassification (ms)": 39.69, - "MassiveScenarioClassification (my)": 9.68, - "MassiveScenarioClassification (nb)": 49.92, - "MassiveScenarioClassification (nl)": 56.09, - "MassiveScenarioClassification (pl)": 45.2, - "MassiveScenarioClassification (pt)": 57.99, - "MassiveScenarioClassification (ro)": 56.0, - "MassiveScenarioClassification (ru)": 27.47, - "MassiveScenarioClassification (sl)": 41.04, - "MassiveScenarioClassification (sq)": 49.38, - "MassiveScenarioClassification (sv)": 50.97, - "MassiveScenarioClassification (sw)": 40.62, - "MassiveScenarioClassification (ta)": 7.59, - "MassiveScenarioClassification (te)": 7.07, - "MassiveScenarioClassification (th)": 8.52, - "MassiveScenarioClassification (tl)": 49.89, - "MassiveScenarioClassification (tr)": 43.08, - "MassiveScenarioClassification (ur)": 9.31, - "MassiveScenarioClassification (vi)": 27.46, - "MassiveScenarioClassification (zh-CN)": 4.7, - "MassiveScenarioClassification (zh-TW)": 7.24, - "ToxicConversationsClassification": 71.73, - "TweetSentimentExtractionClassification": 62.33 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "sentence-t5-large", - "AlloProfClusteringP2P": 61.82, - "AlloProfClusteringS2S": 39.78, - "ArxivClusteringP2P": 41.62, - "ArxivClusteringS2S": 29.44, - "BiorxivClusteringP2P": 35.99, - "BiorxivClusteringS2S": 24.02, - "BlurbsClusteringP2P": 35.33, - "BlurbsClusteringS2S": 13.27, - "HALClusteringS2S": 18.73, - "MLSUMClusteringP2P": 42.07, - "MLSUMClusteringS2S": 31.87, - "MasakhaNEWSClusteringP2P (fra)": 58.6, - "MasakhaNEWSClusteringS2S (fra)": 31.33, - "MedrxivClusteringP2P": 32.4, - "MedrxivClusteringS2S": 26.33, - "RedditClustering": 54.53, - "RedditClusteringP2P": 62.5, - "StackExchangeClustering": 65.11, - "StackExchangeClusteringP2P": 36.86, - "TenKGnadClusteringP2P": 44.11, - "TenKGnadClusteringS2S": 17.26, - "TwentyNewsgroupsClustering": 49.33 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "sentence-t5-large", - "OpusparcusPC (fr)": 91.19, - "PawsXPairClassification (fr)": 59.59, - "SprintDuplicateQuestions": 89.01, - "TwitterSemEval2015": 79.75, - "TwitterURLCorpus": 86.14 - }, - { - "Model": "sentence-t5-large", - "OpusparcusPC (fr)": 91.19, - "PawsXPairClassification (fr)": 59.69, - "SprintDuplicateQuestions": 89.02, - "TwitterSemEval2015": 79.75, - "TwitterURLCorpus": 86.14 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "sentence-t5-large", - "AlloprofReranking": 57.99, - "AskUbuntuDupQuestions": 61.51, - "MindSmallReranking": 30.27, - "SciDocsRR": 74.88, - "StackOverflowDupQuestions": 49.34, - "SyntecReranking": 79.77 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-t5-large", - "AlloprofRetrieval": 34.52, - "ArguAna": 39.27, - "BSARDRetrieval": 0.0, - "CQADupstackRetrieval": 38.96, - "ClimateFEVER": 11.36, - "DBPedia": 31.55, - "FEVER": 36.21, - "FiQA2018": 43.55, - "HotpotQA": 33.95, - "MSMARCO": 23.96, - "MintakaRetrieval (fr)": 23.92, - "NFCorpus": 31.1, - "NQ": 42.02, - "QuoraRetrieval": 85.73, - "SCIDOCS": 15.38, - "SciFact": 49.91, - "SyntecRetrieval": 71.05, - "TRECCOVID": 46.11, - "Touche2020": 21.63, - "XPQARetrieval (fr)": 48.79 + "Model": "instructor-large", + "BrightRetrieval (pony)": 1.32, + "BrightRetrieval (sustainable_living)": 13.16, + "BrightRetrieval (aops)": 7.94, + "BrightRetrieval (biology)": 15.61, + "BrightRetrieval (stackoverflow)": 11.21, + "BrightRetrieval (theoremqa_theorems)": 8.27, + "BrightRetrieval (psychology)": 21.94, + "BrightRetrieval (economics)": 15.99, + "BrightRetrieval (robotics)": 11.45, + "BrightRetrieval (leetcode)": 20.0, + "BrightRetrieval (earth_science)": 21.52, + "BrightRetrieval (theoremqa_questions)": 20.07 } - ] - }, - "STS": { - "cosine_spearman": [ + ], + "recall_at_1": [ { - "Model": "sentence-t5-large", - "BIOSSES": 78.93, - "SICK-R": 80.34, - "SICKFr": 72.83, - "STS12": 79.11, - "STS13": 87.33, - "STS14": 83.17, - "STS15": 88.28, - "STS16": 84.36, - "STS17 (ar-ar)": 10.75, - "STS17 (en-ar)": -4.71, - "STS17 (en-de)": 73.62, - "STS17 (en-en)": 88.99, - "STS17 (en-tr)": -0.42, - "STS17 (es-en)": 62.62, - "STS17 (es-es)": 82.74, - "STS17 (fr-en)": 67.86, - "STS17 (it-en)": 51.86, - "STS17 (ko-ko)": 9.44, - "STS17 (nl-en)": 45.95, - "STS22 (ar)": 27.01, - "STS22 (de)": 43.73, - "STS22 (de-en)": 49.93, - "STS22 (de-fr)": 61.58, - "STS22 (de-pl)": 38.83, - "STS22 (en)": 62.39, - "STS22 (es)": 57.68, - "STS22 (es-en)": 68.09, - "STS22 (es-it)": 61.58, - "STS22 (fr)": 75.01, - "STS22 (fr-pl)": 5.63, - "STS22 (it)": 62.01, - "STS22 (pl)": 25.0, - "STS22 (pl-en)": 51.72, - "STS22 (ru)": 14.21, - "STS22 (tr)": 47.3, - "STS22 (zh)": 30.47, - "STS22 (zh-en)": 23.1, - "STSBenchmark": 85.36, - "STSBenchmarkMultilingualSTS (fr)": 77.59 + "Model": "instructor-large", + "BrightRetrieval (stackoverflow)": 14.53, + "BrightRetrieval (pony)": 3.94, + "BrightRetrieval (economics)": 14.08, + "BrightRetrieval (earth_science)": 29.45, + "BrightRetrieval (sustainable_living)": 25.42, + "BrightRetrieval (psychology)": 21.29, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (biology)": 24.11 } ] }, + "STS": { + "cosine_spearman": [] + }, "Summarization": { - "cosine_spearman": [ - { - "Model": "sentence-t5-large", - "SummEval": 29.64, - "SummEvalFr": 30.23 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -12952,36 +11623,12 @@ "p-MRR": [] } }, - "norbert3-base": { + "instructor-xl": { "BitextMining": { - "f1": [ - { - "Model": "norbert3-base", - "BornholmBitextMining": 6.08 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "norbert3-base", - "AngryTweetsClassification": 52.48, - "DKHateClassification": 58.78, - "DanishPoliticalCommentsClassification": 34.14, - "LccSentimentClassification": 54.07, - "MassiveIntentClassification (da)": 53.16, - "MassiveIntentClassification (nb)": 54.2, - "MassiveIntentClassification (sv)": 52.08, - "MassiveScenarioClassification (da)": 57.17, - "MassiveScenarioClassification (nb)": 60.69, - "MassiveScenarioClassification (sv)": 53.53, - "NoRecClassification": 53.4, - "NordicLangClassification": 82.67, - "NorwegianParliament": 59.33, - "ScalaDaClassification": 58.25, - "ScalaNbClassification": 60.19 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [] @@ -12993,7 +11640,36 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "instructor-xl", + "BrightRetrieval (aops)": 8.26, + "BrightRetrieval (robotics)": 17.39, + "BrightRetrieval (economics)": 22.81, + "BrightRetrieval (stackoverflow)": 19.06, + "BrightRetrieval (leetcode)": 27.5, + "BrightRetrieval (theoremqa_questions)": 14.59, + "BrightRetrieval (psychology)": 27.43, + "BrightRetrieval (biology)": 21.91, + "BrightRetrieval (theoremqa_theorems)": 6.22, + "BrightRetrieval (earth_science)": 34.35, + "BrightRetrieval (sustainable_living)": 18.82, + "BrightRetrieval (pony)": 5.02 + } + ], + "recall_at_1": [ + { + "Model": "instructor-xl", + "BrightRetrieval (stackoverflow)": 14.96, + "BrightRetrieval (biology)": 22.01, + "BrightRetrieval (sustainable_living)": 20.14, + "BrightRetrieval (pony)": 5.93, + "BrightRetrieval (psychology)": 20.5, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (economics)": 14.08, + "BrightRetrieval (earth_science)": 32.04 + } + ] }, "STS": { "cosine_spearman": [] @@ -13005,190 +11681,178 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] - } - }, - "rubert-base-cased": { - "BitextMining": { - "f1": [ + "p-MRR": [ { - "Model": "rubert-base-cased", - "Tatoeba (rus-Cyrl_eng-Latn)": 16.76 + "Model": "instructor-xl", + "Core17InstructionRetrieval": 0.69, + "News21InstructionRetrieval": -0.9, + "Robust04InstructionRetrieval": -8.08 } ] + } + }, + "jina-embeddings-v2-base-en": { + "BitextMining": { + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "rubert-base-cased", - "GeoreviewClassification (rus-Cyrl)": 37.22, - "HeadlineClassification (rus-Cyrl)": 75.23, - "InappropriatenessClassification (rus-Cyrl)": 57.34, - "KinopoiskClassification (rus-Cyrl)": 49.91, - "MassiveIntentClassification (rus-Cyrl)": 53.02, - "MassiveScenarioClassification (rus-Cyrl)": 56.79, - "RuReviewsClassification (rus-Cyrl)": 50.74, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 48.03, - "RuSciBenchOECDClassification (rus-Cyrl)": 36.13 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "rubert-base-cased", - "GeoreviewClusteringP2P (rus-Cyrl)": 28.77, - "MLSUMClusteringP2P (rus-Cyrl)": 41.42, - "MLSUMClusteringS2S (rus-Cyrl)": 40.52, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 28.29, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 26.67 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "rubert-base-cased", - "OpusparcusPC (rus-Cyrl)": 81.65, - "TERRa (rus-Cyrl)": 52.12 - }, - { - "Model": "rubert-base-cased", - "OpusparcusPC (rus-Cyrl)": 81.65, - "TERRa (rus-Cyrl)": 53.17 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "rubert-base-cased", - "MIRACLReranking (rus-Cyrl)": 13.27 - }, - { - "Model": "rubert-base-cased", - "RuBQReranking (rus-Cyrl)": 41.65 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "rubert-base-cased", - "MIRACLRetrieval (rus-Cyrl)": 0.88, - "RiaNewsRetrieval (rus-Cyrl)": 5.58, - "RuBQRetrieval (rus-Cyrl)": 9.52 + "Model": "jina-embeddings-v2-base-en", + "LEMBNarrativeQARetrieval": 37.89, + "LEMBQMSumRetrieval": 38.87, + "LEMBSummScreenFDRetrieval": 93.48, + "LEMBWikimQARetrieval": 73.99 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "rubert-base-cased", - "RUParaPhraserSTS (rus-Cyrl)": 49.72, - "RuSTSBenchmarkSTS (rus-Cyrl)": 53.95, - "STS22 (rus-Cyrl)": 34.98, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 53.76 - } - ] + "cosine_spearman": [] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "rubert-base-cased", - "CEDRClassification (rus-Cyrl)": 33.59, - "SensitiveTopicsClassification (rus-Cyrl)": 18.8 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "text2vec-base-multilingual": { + "komninos": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "komninos", + "BUCC": 0.05, + "Tatoeba": 7.27 + } + ] }, "Classification": { "accuracy": [ { - "Model": "text2vec-base-multilingual", - "AmazonReviewsClassification (fr)": 34.25, - "MTOPDomainClassification (fr)": 71.83, - "MTOPIntentClassification (fr)": 44.53, - "MasakhaNEWSClassification (fra)": 73.84, - "MassiveIntentClassification (fr)": 51.93, - "MassiveScenarioClassification (fr)": 58.31 + "Model": "komninos", + "AmazonCounterfactualClassification": 60.54, + "AmazonPolarityClassification": 59.59, + "AmazonReviewsClassification": 31.01, + "Banking77Classification": 67.05, + "EmotionClassification": 33.18, + "ImdbClassification": 63.98, + "MTOPDomainClassification": 78.57, + "MTOPIntentClassification": 57.07, + "MassiveIntentClassification": 57.21, + "MassiveScenarioClassification": 66.11, + "ToxicConversationsClassification": 67.76, + "TweetSentimentExtractionClassification": 49.68 } ] }, "Clustering": { "v_measure": [ { - "Model": "text2vec-base-multilingual", - "AlloProfClusteringP2P": 49.11, - "AlloProfClusteringS2S": 32.72, - "HALClusteringS2S": 16.19, - "MLSUMClusteringP2P": 36.19, - "MLSUMClusteringS2S": 30.39, - "MasakhaNEWSClusteringP2P (fra)": 38.51, - "MasakhaNEWSClusteringS2S (fra)": 32.51 + "Model": "komninos", + "ArxivClusteringP2P": 34.73, + "ArxivClusteringS2S": 26.01, + "BiorxivClusteringP2P": 29.76, + "BiorxivClusteringS2S": 20.71, + "BlurbsClusteringP2P": 11.37, + "BlurbsClusteringS2S": 8.01, + "MedrxivClusteringP2P": 26.65, + "MedrxivClusteringS2S": 21.5, + "RedditClustering": 28.84, + "RedditClusteringP2P": 7.37, + "StackExchangeClustering": 39.04, + "StackExchangeClusteringP2P": 30.23, + "TenKGnadClusteringP2P": 15.89, + "TenKGnadClusteringS2S": 4.84, + "TwentyNewsgroupsClustering": 27.42 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text2vec-base-multilingual", - "OpusparcusPC (fr)": 92.04, - "PawsXPairClassification (fr)": 65.57 + "Model": "komninos", + "SprintDuplicateQuestions": 85.55, + "TwitterSemEval2015": 53.85, + "TwitterURLCorpus": 79.41 }, { - "Model": "text2vec-base-multilingual", - "OpusparcusPC (fr)": 92.04, - "PawsXPairClassification (fr)": 65.6 + "Model": "komninos", + "SprintDuplicateQuestions": 85.55, + "TwitterSemEval2015": 54.02, + "TwitterURLCorpus": 79.41 } ] }, "Reranking": { "map": [ { - "Model": "text2vec-base-multilingual", - "AlloprofReranking": 51.48, - "SyntecReranking": 70.28 + "Model": "komninos", + "AskUbuntuDupQuestions": 50.88, + "MindSmallReranking": 28.92, + "SciDocsRR": 63.55, + "StackOverflowDupQuestions": 35.65 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text2vec-base-multilingual", - "AlloprofRetrieval": 18.9, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 14.81, - "SyntecRetrieval": 49.69, - "XPQARetrieval (fr)": 40.4 + "Model": "komninos", + "ArguAna": 30.96, + "CQADupstackRetrieval": 16.79, + "ClimateFEVER": 14.87, + "DBPedia": 15.88, + "FEVER": 15.56, + "FiQA2018": 10.49, + "HotpotQA": 20.77, + "MSMARCO": 9.75, + "NFCorpus": 11.79, + "NQ": 12.75, + "QuoraRetrieval": 71.57, + "SCIDOCS": 8.47, + "SciFact": 29.53, + "TRECCOVID": 35.92, + "Touche2020": 13.17 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text2vec-base-multilingual", - "SICKFr": 77.25, - "STS22 (fr)": 74.1, - "STSBenchmarkMultilingualSTS (fr)": 83.48 + "Model": "komninos", + "BIOSSES": 50.25, + "SICK-R": 55.49, + "STS12": 53.51, + "STS13": 70.8, + "STS14": 63.56, + "STS15": 74.08, + "STS16": 64.6, + "STS17": 0.44, + "STS22": 14.05, + "STSBenchmark": 61.55 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "text2vec-base-multilingual", - "SummEvalFr": 29.33 + "Model": "komninos", + "SummEval": 30.49 } ] }, @@ -13199,6 +11863,45 @@ "p-MRR": [] } }, + "llama-2-7b-chat": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "llama-2-7b-chat", + "Core17InstructionRetrieval": 2.84, + "News21InstructionRetrieval": 0.23, + "Robust04InstructionRetrieval": 2.0 + } + ] + } + }, "luotuo-bert-medium": { "BitextMining": { "f1": [] @@ -13207,11 +11910,11 @@ "accuracy": [ { "Model": "luotuo-bert-medium", - "AmazonReviewsClassification (zh)": 34.46, + "AmazonReviewsClassification": 34.46, "IFlyTek": 41.75, "JDReview": 79.68, - "MassiveIntentClassification (zh-CN)": 57.47, - "MassiveScenarioClassification (zh-CN)": 65.32, + "MassiveIntentClassification": 57.47, + "MassiveScenarioClassification": 65.32, "MultilingualSentiment": 61.21, "OnlineShopping": 84.3, "TNews": 45.22, @@ -13280,7 +11983,7 @@ "LCQMC": 66.74, "PAWSX": 12.31, "QBQTC": 27.2, - "STS22 (zh)": 66.4, + "STS22": 66.4, "STSB": 73.22 } ] @@ -13295,979 +11998,185 @@ "p-MRR": [] } }, - "e5-base": { - "BitextMining": { - "f1": [ - { - "Model": "e5-base", - "BornholmBitextMining": 40.09 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "e5-base", - "AngryTweetsClassification": 45.06, - "DKHateClassification": 58.51, - "DanishPoliticalCommentsClassification": 28.43, - "LccSentimentClassification": 37.47, - "MassiveIntentClassification (da)": 44.25, - "MassiveIntentClassification (nb)": 41.57, - "MassiveIntentClassification (sv)": 41.34, - "MassiveScenarioClassification (da)": 52.99, - "MassiveScenarioClassification (nb)": 50.33, - "MassiveScenarioClassification (sv)": 50.0, - "NoRecClassification": 42.0, - "NordicLangClassification": 59.34, - "NorwegianParliament": 57.42, - "ScalaDaClassification": 50.08, - "ScalaNbClassification": 50.18 - } - ] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "e5-base", - "LEMBNarrativeQARetrieval": 25.31, - "LEMBQMSumRetrieval": 23.83, - "LEMBSummScreenFDRetrieval": 74.67, - "LEMBWikimQARetrieval": 55.85 - }, - { - "Model": "e5-base", - "LEMBNeedleRetrieval": 28.5, - "LEMBPasskeyRetrieval": 33.25 - } - ] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "multilingual-e5-small": { - "BitextMining": { - "f1": [ - { - "Model": "multilingual-e5-small", - "BornholmBitextMining (dan-Latn)": 37.15, - "BornholmBitextMining": 43.89, - "Tatoeba (dan-Latn_eng-Latn)": 86.38, - "Tatoeba (aze-Latn_eng-Latn)": 80.79, - "Tatoeba (tur-Latn_eng-Latn)": 88.42, - "Tatoeba (ces-Latn_eng-Latn)": 80.99, - "Tatoeba (swh-Latn_eng-Latn)": 65.43, - "Tatoeba (hrv-Latn_eng-Latn)": 84.42, - "Tatoeba (est-Latn_eng-Latn)": 56.47, - "Tatoeba (bel-Cyrl_eng-Latn)": 80.89, - "Tatoeba (mhr-Cyrl_eng-Latn)": 5.58, - "Tatoeba (cat-Latn_eng-Latn)": 79.3, - "Tatoeba (ben-Beng_eng-Latn)": 81.4, - "Tatoeba (tgl-Latn_eng-Latn)": 77.54, - "Tatoeba (cym-Latn_eng-Latn)": 62.3, - "Tatoeba (tha-Thai_eng-Latn)": 90.88, - "Tatoeba (swe-Latn_eng-Latn)": 87.46, - "Tatoeba (oci-Latn_eng-Latn)": 38.27, - "Tatoeba (gsw-Latn_eng-Latn)": 40.13, - "Tatoeba (ceb-Latn_eng-Latn)": 42.35, - "Tatoeba (ind-Latn_eng-Latn)": 88.28, - "Tatoeba (ron-Latn_eng-Latn)": 85.68, - "Tatoeba (gla-Latn_eng-Latn)": 35.96, - "Tatoeba (ile-Latn_eng-Latn)": 70.31, - "Tatoeba (orv-Cyrl_eng-Latn)": 14.89, - "Tatoeba (vie-Latn_eng-Latn)": 89.03, - "Tatoeba (slv-Latn_eng-Latn)": 73.93, - "Tatoeba (tel-Telu_eng-Latn)": 86.82, - "Tatoeba (cmn-Hans_eng-Latn)": 89.85, - "Tatoeba (fao-Latn_eng-Latn)": 56.57, - "Tatoeba (glg-Latn_eng-Latn)": 79.65, - "Tatoeba (tuk-Latn_eng-Latn)": 16.99, - "Tatoeba (war-Latn_eng-Latn)": 39.14, - "Tatoeba (ita-Latn_eng-Latn)": 88.54, - "Tatoeba (epo-Latn_eng-Latn)": 88.96, - "Tatoeba (fra-Latn_eng-Latn)": 90.51, - "Tatoeba (pol-Latn_eng-Latn)": 88.85, - "Tatoeba (khm-Khmr_eng-Latn)": 44.34, - "Tatoeba (fin-Latn_eng-Latn)": 70.23, - "Tatoeba (zsm-Latn_eng-Latn)": 91.37, - "Tatoeba (bre-Latn_eng-Latn)": 7.09, - "Tatoeba (kur-Latn_eng-Latn)": 39.99, - "Tatoeba (yid-Hebr_eng-Latn)": 65.9, - "Tatoeba (kaz-Cyrl_eng-Latn)": 70.57, - "Tatoeba (cbk-Latn_eng-Latn)": 55.36, - "Tatoeba (mkd-Cyrl_eng-Latn)": 63.74, - "Tatoeba (hsb-Latn_eng-Latn)": 36.49, - "Tatoeba (deu-Latn_eng-Latn)": 97.22, - "Tatoeba (isl-Latn_eng-Latn)": 62.32, - "Tatoeba (kzj-Latn_eng-Latn)": 6.56, - "Tatoeba (lat-Latn_eng-Latn)": 37.76, - "Tatoeba (rus-Cyrl_eng-Latn)": 89.77, - "Tatoeba (ber-Tfng_eng-Latn)": 18.22, - "Tatoeba (nld-Latn_eng-Latn)": 91.87, - "Tatoeba (lit-Latn_eng-Latn)": 59.95, - "Tatoeba (uig-Arab_eng-Latn)": 60.59, - "Tatoeba (kab-Latn_eng-Latn)": 18.06, - "Tatoeba (nov-Latn_eng-Latn)": 64.2, - "Tatoeba (heb-Hebr_eng-Latn)": 73.68, - "Tatoeba (eus-Latn_eng-Latn)": 50.9, - "Tatoeba (kat-Geor_eng-Latn)": 77.6, - "Tatoeba (xho-Latn_eng-Latn)": 63.2, - "Tatoeba (yue-Hant_eng-Latn)": 69.33, - "Tatoeba (ina-Latn_eng-Latn)": 86.39, - "Tatoeba (bos-Latn_eng-Latn)": 81.15, - "Tatoeba (lfn-Latn_eng-Latn)": 51.46, - "Tatoeba (nno-Latn_eng-Latn)": 70.29, - "Tatoeba (urd-Arab_eng-Latn)": 85.07, - "Tatoeba (arq-Arab_eng-Latn)": 23.62, - "Tatoeba (tam-Taml_eng-Latn)": 82.82, - "Tatoeba (ang-Latn_eng-Latn)": 30.3, - "Tatoeba (swg-Latn_eng-Latn)": 44.0, - "Tatoeba (csb-Latn_eng-Latn)": 26.23, - "Tatoeba (wuu-Hans_eng-Latn)": 67.3, - "Tatoeba (max-Deva_eng-Latn)": 48.29, - "Tatoeba (tzl-Latn_eng-Latn)": 34.83, - "Tatoeba (uzb-Latn_eng-Latn)": 59.11, - "Tatoeba (amh-Ethi_eng-Latn)": 74.11, - "Tatoeba (fry-Latn_eng-Latn)": 49.05, - "Tatoeba (mar-Deva_eng-Latn)": 85.94, - "Tatoeba (pes-Arab_eng-Latn)": 85.51, - "Tatoeba (por-Latn_eng-Latn)": 89.63, - "Tatoeba (hin-Deva_eng-Latn)": 92.36, - "Tatoeba (tat-Cyrl_eng-Latn)": 66.8, - "Tatoeba (sqi-Latn_eng-Latn)": 86.21, - "Tatoeba (cor-Latn_eng-Latn)": 5.24, - "Tatoeba (slk-Latn_eng-Latn)": 79.86, - "Tatoeba (dtp-Latn_eng-Latn)": 6.42, - "Tatoeba (jpn-Jpan_eng-Latn)": 77.43, - "Tatoeba (ell-Grek_eng-Latn)": 86.81, - "Tatoeba (lvs-Latn_eng-Latn)": 61.84, - "Tatoeba (cha-Latn_eng-Latn)": 24.88, - "Tatoeba (arz-Arab_eng-Latn)": 53.35, - "Tatoeba (dsb-Latn_eng-Latn)": 29.87, - "Tatoeba (kor-Hang_eng-Latn)": 73.74, - "Tatoeba (srp-Cyrl_eng-Latn)": 83.06, - "Tatoeba (mal-Mlym_eng-Latn)": 94.78, - "Tatoeba (hye-Armn_eng-Latn)": 83.81, - "Tatoeba (spa-Latn_eng-Latn)": 93.01, - "Tatoeba (ara-Arab_eng-Latn)": 76.09, - "Tatoeba (bul-Cyrl_eng-Latn)": 85.47, - "Tatoeba (jav-Latn_eng-Latn)": 53.39, - "Tatoeba (ukr-Cyrl_eng-Latn)": 82.98, - "Tatoeba (awa-Deva_eng-Latn)": 74.55, - "Tatoeba (nob-Latn_eng-Latn)": 90.18, - "Tatoeba (ido-Latn_eng-Latn)": 70.07, - "Tatoeba (hun-Latn_eng-Latn)": 74.44, - "Tatoeba (nds-Latn_eng-Latn)": 52.46, - "Tatoeba (pam-Latn_eng-Latn)": 5.76, - "Tatoeba (ast-Latn_eng-Latn)": 62.81, - "Tatoeba (pms-Latn_eng-Latn)": 35.47, - "Tatoeba (afr-Latn_eng-Latn)": 85.17, - "Tatoeba (gle-Latn_eng-Latn)": 56.32, - "Tatoeba (mon-Cyrl_eng-Latn)": 77.7 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "multilingual-e5-small", - "AllegroReviews (pol-Latn)": 37.33, - "AllegroReviews": 37.42, - "AmazonCounterfactualClassification (en-ext)": 73.07, - "AmazonCounterfactualClassification (en)": 71.87, - "AmazonCounterfactualClassification (deu-Latn)": 71.72, - "AmazonCounterfactualClassification (jpn-Jpan)": 61.46, - "AmazonPolarityClassification": 88.61, - "AmazonReviewsClassification (en)": 45.75, - "AmazonReviewsClassification (deu-Latn)": 41.07, - "AmazonReviewsClassification (spa-Latn)": 41.37, - "AmazonReviewsClassification (fra-Latn)": 39.47, - "AmazonReviewsClassification (jpn-Jpan)": 38.55, - "AmazonReviewsClassification (cmn-Hans)": 38.31, - "AmazonReviewsClassification (fr)": 39.68, - "AngryTweetsClassification (dan-Latn)": 56.27, - "AngryTweetsClassification": 53.57, - "Banking77Classification": 70.44, - "CBD (pol-Latn)": 63.33, - "CBD": 63.25, - "DKHateClassification": 60.73, - "DanishPoliticalCommentsClassification (dan-Latn)": 34.82, - "DanishPoliticalCommentsClassification": 34.38, - "EmotionClassification": 42.86, - "GeoreviewClassification (rus-Cyrl)": 44.66, - "HeadlineClassification (rus-Cyrl)": 73.94, - "IFlyTek (cmn-Hans)": 40.74, - "IFlyTek": 47.35, - "ImdbClassification": 79.57, - "InappropriatenessClassification (rus-Cyrl)": 59.16, - "JDReview (cmn-Hans)": 78.37, - "JDReview": 79.34, - "KinopoiskClassification (rus-Cyrl)": 49.96, - "LccSentimentClassification (dan-Latn)": 58.6, - "LccSentimentClassification": 57.87, - "MTOPDomainClassification (en)": 88.99, - "MTOPDomainClassification (deu-Latn)": 86.15, - "MTOPDomainClassification (spa-Latn)": 85.53, - "MTOPDomainClassification (fra-Latn)": 81.5, - "MTOPDomainClassification (hin-Deva)": 84.07, - "MTOPDomainClassification (tha-Thai)": 83.16, - "MTOPDomainClassification (fr)": 81.2, - "MTOPIntentClassification (en)": 56.69, - "MTOPIntentClassification (deu-Latn)": 55.88, - "MTOPIntentClassification (spa-Latn)": 53.15, - "MTOPIntentClassification (fra-Latn)": 44.35, - "MTOPIntentClassification (hin-Deva)": 52.26, - "MTOPIntentClassification (tha-Thai)": 54.61, - "MTOPIntentClassification (fr)": 46.01, - "MasakhaNEWSClassification (amh-Ethi)": 84.28, - "MasakhaNEWSClassification (eng)": 75.61, - "MasakhaNEWSClassification (fra-Latn)": 74.67, - "MasakhaNEWSClassification (hau-Latn)": 73.08, - "MasakhaNEWSClassification (ibo-Latn)": 63.9, - "MasakhaNEWSClassification (lin-Latn)": 73.37, - "MasakhaNEWSClassification (lug-Latn)": 67.89, - "MasakhaNEWSClassification (orm-Ethi)": 68.77, - "MasakhaNEWSClassification (pcm-Latn)": 90.79, - "MasakhaNEWSClassification (run-Latn)": 75.4, - "MasakhaNEWSClassification (sna-Latn)": 82.76, - "MasakhaNEWSClassification (som-Latn)": 59.8, - "MasakhaNEWSClassification (swa-Latn)": 69.85, - "MasakhaNEWSClassification (tir-Ethi)": 68.01, - "MasakhaNEWSClassification (xho-Latn)": 72.22, - "MasakhaNEWSClassification (yor-Latn)": 73.84, - "MasakhaNEWSClassification (fra)": 77.65, - "MassiveIntentClassification (isl-Latn)": 41.53, - "MassiveIntentClassification (tgl-Latn)": 48.7, - "MassiveIntentClassification (heb-Hebr)": 51.11, - "MassiveIntentClassification (tam-Taml)": 47.65, - "MassiveIntentClassification (ara-Arab)": 47.78, - "MassiveIntentClassification (mya-Mymr)": 45.64, - "MassiveIntentClassification (slv-Latn)": 47.71, - "MassiveIntentClassification (afr-Latn)": 48.74, - "MassiveIntentClassification (deu-Latn)": 55.52, - "MassiveIntentClassification (pol-Latn)": 57.33, - "MassiveIntentClassification (en)": 63.87, - "MassiveIntentClassification (fin-Latn)": 55.14, - "MassiveIntentClassification (lav-Latn)": 44.93, - "MassiveIntentClassification (fra-Latn)": 57.9, - "MassiveIntentClassification (urd-Arab)": 50.51, - "MassiveIntentClassification (mon-Cyrl)": 47.38, - "MassiveIntentClassification (ita-Latn)": 58.8, - "MassiveIntentClassification (kor-Kore)": 57.12, - "MassiveIntentClassification (nob-Latn)": 55.36, - "MassiveIntentClassification (spa-Latn)": 59.19, - "MassiveIntentClassification (jpn-Jpan)": 61.58, - "MassiveIntentClassification (dan-Latn)": 56.12, - "MassiveIntentClassification (cmo-Hant)": 53.75, - "MassiveIntentClassification (ind-Latn)": 56.2, - "MassiveIntentClassification (kat-Geor)": 39.52, - "MassiveIntentClassification (hin-Deva)": 55.69, - "MassiveIntentClassification (cym-Latn)": 36.62, - "MassiveIntentClassification (kan-Knda)": 47.85, - "MassiveIntentClassification (por-Latn)": 60.12, - "MassiveIntentClassification (tha-Thai)": 56.26, - "MassiveIntentClassification (fas-Arab)": 57.73, - "MassiveIntentClassification (ben-Beng)": 50.68, - "MassiveIntentClassification (mal-Mlym)": 52.81, - "MassiveIntentClassification (ron-Latn)": 52.82, - "MassiveIntentClassification (amh-Ethi)": 43.52, - "MassiveIntentClassification (hun-Latn)": 53.21, - "MassiveIntentClassification (swa-Latn)": 44.84, - "MassiveIntentClassification (msa-Latn)": 50.8, - "MassiveIntentClassification (tur-Latn)": 56.88, - "MassiveIntentClassification (khm-Khmr)": 33.45, - "MassiveIntentClassification (rus-Cyrl)": 58.43, - "MassiveIntentClassification (aze-Latn)": 49.32, - "MassiveIntentClassification (tel-Telu)": 48.85, - "MassiveIntentClassification (nld-Latn)": 59.27, - "MassiveIntentClassification (cmo-Hans)": 62.04, - "MassiveIntentClassification (sqi-Latn)": 48.68, - "MassiveIntentClassification (vie-Latn)": 56.19, - "MassiveIntentClassification (jav-Latn)": 42.96, - "MassiveIntentClassification (swe-Latn)": 58.2, - "MassiveIntentClassification (hye-Armn)": 47.89, - "MassiveIntentClassification (ell-Grek)": 54.14, - "MassiveIntentClassification (da)": 54.63, - "MassiveIntentClassification (nb)": 53.96, - "MassiveIntentClassification (sv)": 56.6, - "MassiveIntentClassification (pl)": 57.4, - "MassiveScenarioClassification (deu-Latn)": 65.88, - "MassiveScenarioClassification (nob-Latn)": 61.96, - "MassiveScenarioClassification (tha-Thai)": 65.72, - "MassiveScenarioClassification (kat-Geor)": 44.96, - "MassiveScenarioClassification (jav-Latn)": 51.39, - "MassiveScenarioClassification (swe-Latn)": 67.33, - "MassiveScenarioClassification (fra-Latn)": 63.9, - "MassiveScenarioClassification (tgl-Latn)": 55.3, - "MassiveScenarioClassification (hun-Latn)": 61.93, - "MassiveScenarioClassification (urd-Arab)": 55.91, - "MassiveScenarioClassification (msa-Latn)": 59.18, - "MassiveScenarioClassification (aze-Latn)": 53.27, - "MassiveScenarioClassification (afr-Latn)": 58.0, - "MassiveScenarioClassification (cmo-Hant)": 61.15, - "MassiveScenarioClassification (lav-Latn)": 51.0, - "MassiveScenarioClassification (khm-Khmr)": 39.01, - "MassiveScenarioClassification (ell-Grek)": 62.29, - "MassiveScenarioClassification (ben-Beng)": 57.38, - "MassiveScenarioClassification (dan-Latn)": 64.03, - "MassiveScenarioClassification (mal-Mlym)": 60.31, - "MassiveScenarioClassification (ron-Latn)": 60.0, - "MassiveScenarioClassification (rus-Cyrl)": 63.89, - "MassiveScenarioClassification (ita-Latn)": 64.03, - "MassiveScenarioClassification (amh-Ethi)": 50.53, - "MassiveScenarioClassification (isl-Latn)": 49.66, - "MassiveScenarioClassification (jpn-Jpan)": 67.75, - "MassiveScenarioClassification (cmo-Hans)": 68.96, - "MassiveScenarioClassification (ind-Latn)": 62.0, - "MassiveScenarioClassification (tur-Latn)": 62.14, - "MassiveScenarioClassification (fas-Arab)": 63.32, - "MassiveScenarioClassification (tam-Taml)": 52.74, - "MassiveScenarioClassification (kan-Knda)": 52.73, - "MassiveScenarioClassification (por-Latn)": 62.75, - "MassiveScenarioClassification (cym-Latn)": 44.63, - "MassiveScenarioClassification (mya-Mymr)": 51.07, - "MassiveScenarioClassification (spa-Latn)": 64.43, - "MassiveScenarioClassification (hin-Deva)": 62.22, - "MassiveScenarioClassification (tel-Telu)": 54.86, - "MassiveScenarioClassification (mon-Cyrl)": 52.41, - "MassiveScenarioClassification (kor-Kore)": 65.7, - "MassiveScenarioClassification (slv-Latn)": 54.05, - "MassiveScenarioClassification (swa-Latn)": 52.42, - "MassiveScenarioClassification (hye-Armn)": 52.93, - "MassiveScenarioClassification (nld-Latn)": 67.01, - "MassiveScenarioClassification (sqi-Latn)": 56.15, - "MassiveScenarioClassification (fin-Latn)": 61.89, - "MassiveScenarioClassification (en)": 69.28, - "MassiveScenarioClassification (vie-Latn)": 62.67, - "MassiveScenarioClassification (heb-Hebr)": 59.22, - "MassiveScenarioClassification (ara-Arab)": 54.56, - "MassiveScenarioClassification (pol-Latn)": 64.27, - "MassiveScenarioClassification (da)": 62.34, - "MassiveScenarioClassification (nb)": 59.9, - "MassiveScenarioClassification (sv)": 65.54, - "MassiveScenarioClassification (pl)": 64.25, - "MultilingualSentiment (cmn-Hans)": 66.0, - "MultilingualSentiment": 64.74, - "NoRecClassification (nob-Latn)": 50.08, - "NoRecClassification": 53.96, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 72.15, - "NordicLangClassification": 75.15, - "NorwegianParliament": 60.15, - "OnlineShopping (cmn-Hans)": 88.7, - "OnlineShopping": 88.73, - "PAC (pol-Latn)": 70.48, - "PAC": 70.55, - "PolEmo2.0-IN (pol-Latn)": 67.31, - "PolEmo2.0-IN": 67.35, - "PolEmo2.0-OUT (pol-Latn)": 39.17, - "PolEmo2.0-OUT": 39.13, - "RuReviewsClassification (rus-Cyrl)": 61.18, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 54.99, - "RuSciBenchOECDClassification (rus-Cyrl)": 41.72, - "ScalaDaClassification": 50.3, - "ScalaNbClassification": 50.06, - "TNews (cmn-Hans)": 46.6, - "TNews": 48.38, - "ToxicConversationsClassification": 63.59, - "TweetSentimentExtractionClassification": 62.79, - "Waimai (cmn-Hans)": 84.15, - "Waimai": 83.9 + "m3e-base": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [ + { + "Model": "m3e-base", + "AmazonReviewsClassification": 43.02, + "IFlyTek": 44.42, + "JDReview": 85.33, + "MassiveIntentClassification": 68.4, + "MassiveScenarioClassification": 74.6, + "MultilingualSentiment": 71.9, + "OnlineShopping": 87.77, + "TNews": 48.28, + "Waimai": 83.99 } ] }, "Clustering": { "v_measure": [ { - "Model": "multilingual-e5-small", - "8TagsClustering": 23.92, - "AlloProfClusteringP2P": 60.89, - "AlloProfClusteringS2S": 32.52, - "BiorxivClusteringP2P": 35.84, - "BiorxivClusteringS2S": 27.35, - "CLSClusteringP2P": 39.14, - "CLSClusteringS2S": 37.79, - "GeoreviewClusteringP2P (rus-Cyrl)": 58.57, - "HALClusteringS2S": 18.95, - "MLSUMClusteringP2P (rus-Cyrl)": 39.69, - "MLSUMClusteringP2P": 43.2, - "MLSUMClusteringS2S (rus-Cyrl)": 39.9, - "MLSUMClusteringS2S": 37.61, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 66.2, - "MasakhaNEWSClusteringP2P (eng)": 50.08, - "MasakhaNEWSClusteringP2P (fra-Latn)": 56.32, - "MasakhaNEWSClusteringP2P (hau-Latn)": 53.63, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 49.19, - "MasakhaNEWSClusteringP2P (lin-Latn)": 55.06, - "MasakhaNEWSClusteringP2P (lug-Latn)": 59.97, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 32.72, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 62.22, - "MasakhaNEWSClusteringP2P (run-Latn)": 57.52, - "MasakhaNEWSClusteringP2P (sna-Latn)": 45.11, - "MasakhaNEWSClusteringP2P (som-Latn)": 42.39, - "MasakhaNEWSClusteringP2P (swa-Latn)": 23.77, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 57.68, - "MasakhaNEWSClusteringP2P (xho-Latn)": 39.96, - "MasakhaNEWSClusteringP2P (yor-Latn)": 26.56, - "MasakhaNEWSClusteringP2P (fra)": 40.12, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 55.48, - "MasakhaNEWSClusteringS2S (eng)": 37.79, - "MasakhaNEWSClusteringS2S (fra-Latn)": 35.8, - "MasakhaNEWSClusteringS2S (hau-Latn)": 20.22, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 35.67, - "MasakhaNEWSClusteringS2S (lin-Latn)": 41.12, - "MasakhaNEWSClusteringS2S (lug-Latn)": 48.63, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 29.16, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 65.36, - "MasakhaNEWSClusteringS2S (run-Latn)": 45.5, - "MasakhaNEWSClusteringS2S (sna-Latn)": 47.61, - "MasakhaNEWSClusteringS2S (som-Latn)": 28.59, - "MasakhaNEWSClusteringS2S (swa-Latn)": 13.91, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51, - "MasakhaNEWSClusteringS2S (xho-Latn)": 37.26, - "MasakhaNEWSClusteringS2S (yor-Latn)": 23.38, - "MasakhaNEWSClusteringS2S (fra)": 39.22, - "MedrxivClusteringP2P": 30.72, - "MedrxivClusteringS2S": 27.0, - "RedditClustering": 40.12, - "RedditClusteringP2P": 59.49, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 51.14, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.33, - "StackExchangeClustering": 53.32, - "StackExchangeClusteringP2P": 31.87, - "ThuNewsClusteringP2P": 55.18, - "ThuNewsClusteringS2S": 48.93, - "TwentyNewsgroupsClustering": 33.67 + "Model": "m3e-base", + "CLSClusteringP2P": 39.81, + "CLSClusteringS2S": 37.34, + "ThuNewsClusteringP2P": 59.77, + "ThuNewsClusteringS2S": 53.78 } ] }, "PairClassification": { "max_ap": [ { - "Model": "multilingual-e5-small", - "CDSC-E (pol-Latn)": 69.69, - "OpusparcusPC (deu-Latn)": 94.9, - "OpusparcusPC (en)": 98.42, - "OpusparcusPC (fin-Latn)": 88.29, - "OpusparcusPC (fra-Latn)": 91.77, - "OpusparcusPC (rus-Cyrl)": 84.79, - "OpusparcusPC (swe-Latn)": 91.07, - "PSC (pol-Latn)": 99.23, - "PawsXPairClassification (deu-Latn)": 52.13, - "PawsXPairClassification (en)": 53.91, - "PawsXPairClassification (spa-Latn)": 51.39, - "PawsXPairClassification (fra-Latn)": 52.69, - "PawsXPairClassification (jpn-Hira)": 48.24, - "PawsXPairClassification (kor-Hang)": 49.95, - "PawsXPairClassification (cmn-Hans)": 54.01, - "SICK-E-PL (pol-Latn)": 66.35, - "SprintDuplicateQuestions": 92.18, - "TERRa (rus-Cyrl)": 55.14, - "TwitterSemEval2015": 70.75, - "TwitterURLCorpus": 85.03 - }, - { - "Model": "multilingual-e5-small", - "CDSC-E (pol-Latn)": 69.85, - "CDSC-E": 69.84, - "Cmnli": 72.12, - "Ocnli": 60.77, - "OpusparcusPC (deu-Latn)": 94.9, - "OpusparcusPC (en)": 98.42, - "OpusparcusPC (fin-Latn)": 88.29, - "OpusparcusPC (fra-Latn)": 91.77, - "OpusparcusPC (rus-Cyrl)": 84.79, - "OpusparcusPC (swe-Latn)": 91.07, - "OpusparcusPC (fr)": 92.52, - "PPC": 86.79, - "PSC (pol-Latn)": 99.23, - "PSC": 99.24, - "PawsXPairClassification (deu-Latn)": 52.19, - "PawsXPairClassification (en)": 54.01, - "PawsXPairClassification (spa-Latn)": 51.46, - "PawsXPairClassification (fra-Latn)": 52.77, - "PawsXPairClassification (jpn-Hira)": 48.41, - "PawsXPairClassification (kor-Hang)": 49.98, - "PawsXPairClassification (cmn-Hans)": 54.03, - "PawsXPairClassification (fr)": 55.72, - "SICK-E-PL (pol-Latn)": 66.35, - "SICK-E-PL": 66.34, - "SprintDuplicateQuestions": 92.42, - "TERRa (rus-Cyrl)": 55.14, - "TwitterSemEval2015": 70.75, - "TwitterURLCorpus": 85.03 + "Model": "m3e-base", + "Cmnli": 69.98, + "Ocnli": 58.0 }, { - "Model": "multilingual-e5-small", - "CDSC-E": 69.7, - "Cmnli": 72.12, - "Ocnli": 60.77, - "OpusparcusPC (fr)": 92.52, - "PPC": 86.72, - "PSC": 99.24, - "PawsXPairClassification (fr)": 55.68, - "SICK-E-PL": 66.34 + "Model": "m3e-base", + "Cmnli": 70.0, + "Ocnli": 58.05 } ] }, "Reranking": { "map": [ { - "Model": "multilingual-e5-small", - "AlloprofReranking (fra-Latn)": 64.41, - "AlloprofReranking": 56.17, - "AskUbuntuDupQuestions": 56.42, - "CMedQAv1": 63.44, - "CMedQAv2": 62.41, - "MMarcoReranking (cmn-Hans)": 29.98, - "MMarcoReranking": 24.33, - "MindSmallReranking": 29.96, - "RuBQReranking (rus-Cyrl)": 69.98, - "SciDocsRR": 78.26, - "StackOverflowDupQuestions": 46.97, - "SyntecReranking (fra-Latn)": 81.22, - "SyntecReranking": 86.7, - "T2Reranking (cmn-Hans)": 65.72, - "T2Reranking": 65.24 - }, - { - "Model": "multilingual-e5-small", - "MIRACLReranking (rus-Cyrl)": 59.12 + "Model": "m3e-base", + "CMedQAv1": 77.05, + "CMedQAv2": 76.76, + "MMarcoReranking": 17.51, + "T2Reranking": 66.03 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "multilingual-e5-small", - "AILACasedocs": 23.43, - "AILAStatutes": 19.01, - "ARCChallenge": 7.14, - "AlloprofRetrieval (fra-Latn)": 27.38, - "AlloprofRetrieval": 27.01, - "AlphaNLI": 13.0, - "AppsRetrieval (eng-Latn_python-Code)": 12.01, - "ArguAna": 39.09, - "ArguAna-PL (pol-Latn)": 37.49, - "ArguAna-PL": 37.43, - "BSARDRetrieval (fra-Latn)": 14.54, - "BSARDRetrieval": 0.0, - "CmedqaRetrieval (cmn-Hans)": 24.36, - "CmedqaRetrieval": 24.38, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 41.32, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 72.67, - "CodeSearchNetCCRetrieval (python-Code)": 80.75, - "CodeSearchNetCCRetrieval (javascript-Code)": 77.6, - "CodeSearchNetCCRetrieval (go-Code)": 69.66, - "CodeSearchNetCCRetrieval (ruby-Code)": 72.75, - "CodeSearchNetCCRetrieval (java-Code)": 75.58, - "CodeSearchNetCCRetrieval (php-Code)": 69.38, - "CodeSearchNetRetrieval (python-Code)": 86.31, - "CodeSearchNetRetrieval (javascript-Code)": 69.53, - "CodeSearchNetRetrieval (go-Code)": 90.3, - "CodeSearchNetRetrieval (ruby-Code)": 76.58, - "CodeSearchNetRetrieval (java-Code)": 74.96, - "CodeSearchNetRetrieval (php-Code)": 80.28, - "CodeTransOceanContest (python-Code_c++-Code)": 69.76, - "CodeTransOceanDL": 32.3, - "CosQA (eng-Latn_python-Code)": 29.61, - "CovidRetrieval (cmn-Hans)": 72.82, - "CovidRetrieval": 72.82, - "DBPedia-PL": 29.27, - "DuRetrieval (cmn-Hans)": 81.36, - "DuRetrieval": 81.35, - "EcomRetrieval (cmn-Hans)": 53.53, - "EcomRetrieval": 53.56, - "FiQA-PL (pol-Latn)": 22.02, - "FiQA-PL": 22.03, - "FiQA2018": 33.13, - "GerDaLIRSmall (deu-Latn)": 14.81, - "HellaSwag": 23.73, - "HotpotQA-PL": 60.15, - "LEMBNarrativeQARetrieval": 22.6, - "LEMBQMSumRetrieval": 21.51, - "LEMBSummScreenFDRetrieval": 62.75, - "LEMBWikimQARetrieval": 57.13, - "LeCaRDv2 (zho-Hans)": 61.58, - "LegalBenchConsumerContractsQA": 66.98, - "LegalBenchCorporateLobbying": 89.47, - "LegalQuAD (deu-Latn)": 47.8, - "LegalSummarization": 55.76, - "MIRACLRetrieval (rus-Cyrl)": 59.01, - "MMarcoRetrieval (cmn-Hans)": 73.17, - "MMarcoRetrieval": 73.17, - "MSMARCO-PL": 26.94, - "MedicalRetrieval (cmn-Hans)": 44.84, - "MedicalRetrieval": 44.84, - "MintakaRetrieval (ara-Arab)": 21.22, - "MintakaRetrieval (deu-Latn)": 25.6, - "MintakaRetrieval (spa-Latn)": 26.4, - "MintakaRetrieval (fra-Latn)": 25.0, - "MintakaRetrieval (hin-Deva)": 21.1, - "MintakaRetrieval (ita-Latn)": 26.25, - "MintakaRetrieval (jpn-Hira)": 20.69, - "MintakaRetrieval (por-Latn)": 24.44, - "MintakaRetrieval (fr)": 22.53, - "NFCorpus": 31.0, - "NFCorpus-PL (pol-Latn)": 26.5, - "NFCorpus-PL": 26.48, - "NQ-PL": 40.46, - "PIQA": 21.08, - "Quail": 2.38, - "Quora-PL": 78.7, - "RARbCode": 46.96, - "RARbMath": 63.91, - "RiaNewsRetrieval (rus-Cyrl)": 70.0, - "RuBQRetrieval (rus-Cyrl)": 68.53, - "SCIDOCS": 13.9, - "SCIDOCS-PL (pol-Latn)": 11.59, - "SCIDOCS-PL": 11.6, - "SIQA": 2.57, - "SciFact": 67.7, - "SciFact-PL (pol-Latn)": 62.76, - "SciFact-PL": 62.76, - "SpartQA": 5.43, - "StackOverflowQA": 81.94, - "SyntecRetrieval (fra-Latn)": 73.46, - "SyntecRetrieval": 75.76, - "SyntheticText2SQL (eng-Latn_sql-Code)": 46.29, - "T2Retrieval (cmn-Hans)": 71.36, - "T2Retrieval": 71.39, - "TRECCOVID": 72.57, - "TRECCOVID-PL (pol-Latn)": 70.92, - "TRECCOVID-PL": 70.92, - "TempReasonL1": 0.8, - "TempReasonL2Fact": 36.76, - "TempReasonL2Pure": 0.62, - "TempReasonL3Fact": 32.42, - "TempReasonL3Pure": 6.36, - "Touche2020": 21.16, - "VideoRetrieval (cmn-Hans)": 58.06, - "VideoRetrieval": 58.09, - "WinoGrande": 37.46, - "XPQARetrieval (ara-Arab_ara-Arab)": 39.93, - "XPQARetrieval (eng-Latn_ara-Arab)": 18.09, - "XPQARetrieval (ara-Arab_eng-Latn)": 31.64, - "XPQARetrieval (deu-Latn_deu-Latn)": 69.43, - "XPQARetrieval (eng-Latn_deu-Latn)": 25.14, - "XPQARetrieval (deu-Latn_eng-Latn)": 52.36, - "XPQARetrieval (spa-Latn_spa-Latn)": 55.71, - "XPQARetrieval (eng-Latn_spa-Latn)": 22.5, - "XPQARetrieval (spa-Latn_eng-Latn)": 42.4, - "XPQARetrieval (fra-Latn_fra-Latn)": 57.17, - "XPQARetrieval (eng-Latn_fra-Latn)": 27.69, - "XPQARetrieval (fra-Latn_eng-Latn)": 47.46, - "XPQARetrieval (hin-Deva_hin-Deva)": 68.15, - "XPQARetrieval (eng-Latn_hin-Deva)": 25.82, - "XPQARetrieval (hin-Deva_eng-Latn)": 63.79, - "XPQARetrieval (ita-Latn_ita-Latn)": 67.71, - "XPQARetrieval (eng-Latn_ita-Latn)": 22.97, - "XPQARetrieval (ita-Latn_eng-Latn)": 46.61, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 69.49, - "XPQARetrieval (eng-Latn_jpn-Hira)": 25.08, - "XPQARetrieval (jpn-Hira_eng-Latn)": 54.6, - "XPQARetrieval (kor-Hang_kor-Hang)": 32.98, - "XPQARetrieval (eng-Latn_kor-Hang)": 22.38, - "XPQARetrieval (kor-Hang_eng-Latn)": 22.99, - "XPQARetrieval (pol-Latn_pol-Latn)": 43.37, - "XPQARetrieval (eng-Latn_pol-Latn)": 19.89, - "XPQARetrieval (pol-Latn_eng-Latn)": 28.72, - "XPQARetrieval (por-Latn_por-Latn)": 41.8, - "XPQARetrieval (eng-Latn_por-Latn)": 15.79, - "XPQARetrieval (por-Latn_eng-Latn)": 33.77, - "XPQARetrieval (tam-Taml_tam-Taml)": 31.65, - "XPQARetrieval (eng-Latn_tam-Taml)": 13.18, - "XPQARetrieval (tam-Taml_eng-Latn)": 26.44, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 63.98, - "XPQARetrieval (eng-Latn_cmn-Hans)": 16.52, - "XPQARetrieval (cmn-Hans_eng-Latn)": 45.32, - "XPQARetrieval (fr)": 57.47 - }, - { - "Model": "multilingual-e5-small", - "LEMBNeedleRetrieval": 30.75, - "LEMBPasskeyRetrieval": 38.25 + "Model": "m3e-base", + "CmedqaRetrieval": 30.33, + "CovidRetrieval": 66.42, + "DuRetrieval": 75.76, + "EcomRetrieval": 50.27, + "MMarcoRetrieval": 65.46, + "MedicalRetrieval": 42.79, + "T2Retrieval": 73.14, + "VideoRetrieval": 51.11 } ] }, "STS": { "cosine_spearman": [ { - "Model": "multilingual-e5-small", - "AFQMC (cmn-Hans)": 25.21, - "ATEC (cmn-Hans)": 35.14, - "BIOSSES": 82.46, - "BQ (cmn-Hans)": 43.27, - "CDSC-R (pol-Latn)": 90.27, - "LCQMC (cmn-Hans)": 72.7, - "PAWSX (cmn-Hans)": 11.0, - "RUParaPhraserSTS (rus-Cyrl)": 70.46, - "RuSTSBenchmarkSTS (rus-Cyrl)": 78.08, - "SICK-R": 77.51, - "SICK-R-PL (pol-Latn)": 69.45, - "SICKFr (fra-Latn)": 74.67, - "STS12": 76.56, - "STS13": 76.97, - "STS14": 75.52, - "STS15": 87.12, - "STS16": 83.63, - "STS17 (ita-Latn_eng-Latn)": 77.31, - "STS17 (eng-Latn_ara-Arab)": 57.39, - "STS17 (en-en)": 86.42, - "STS17 (eng-Latn_tur-Latn)": 55.93, - "STS17 (ara-Arab)": 73.0, - "STS17 (nld-Latn_eng-Latn)": 75.43, - "STS17 (kor-Hang)": 78.87, - "STS17 (fra-Latn_eng-Latn)": 72.28, - "STS17 (spa-Latn)": 84.83, - "STS17 (eng-Latn_deu-Latn)": 76.82, - "STS17 (spa-Latn_eng-Latn)": 72.43, - "STS22 (pol-Latn_eng-Latn)": 72.69, - "STS22 (deu-Latn_eng-Latn)": 56.07, - "STS22 (spa-Latn)": 66.86, - "STS22 (ara-Arab)": 56.65, - "STS22 (rus-Cyrl)": 59.9, - "STS22 (deu-Latn)": 53.45, - "STS22 (cmn-Hans_eng-Latn)": 65.32, - "STS22 (en)": 61.25, - "STS22 (fra-Latn)": 76.58, - "STS22 (ita-Latn)": 76.53, - "STS22 (spa-Latn_ita-Latn)": 71.74, - "STS22 (spa-Latn_eng-Latn)": 74.2, - "STS22 (deu-Latn_fra-Latn)": 60.62, - "STS22 (tur-Latn)": 63.69, - "STS22 (pol-Latn)": 35.78, - "STS22 (fra-Latn_pol-Latn)": 84.52, - "STS22 (cmn-Hans)": 66.85, - "STS22 (deu-Latn_pol-Latn)": 28.24, - "STSB (cmn-Hans)": 77.73, - "STSBenchmark": 84.11, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 78.49, - "STSBenchmarkMultilingualSTS (pol-Latn)": 72.61, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 78.24, - "STSBenchmarkMultilingualSTS (en)": 84.11, - "STSBenchmarkMultilingualSTS (por-Latn)": 77.39, - "STSBenchmarkMultilingualSTS (ita-Latn)": 78.21, - "STSBenchmarkMultilingualSTS (fra-Latn)": 79.2, - "STSBenchmarkMultilingualSTS (deu-Latn)": 79.17, - "STSBenchmarkMultilingualSTS (nld-Latn)": 76.04, - "STSBenchmarkMultilingualSTS (spa-Latn)": 80.31 - }, - { - "Model": "multilingual-e5-small", - "AFQMC (cmn-Hans)": 25.21, - "ATEC (cmn-Hans)": 35.14, - "BIOSSES": 82.46, - "BQ (cmn-Hans)": 43.27, - "CDSC-R (pol-Latn)": 90.27, - "LCQMC (cmn-Hans)": 72.7, - "PAWSX (cmn-Hans)": 11.0, - "RUParaPhraserSTS (rus-Cyrl)": 70.46, - "RuSTSBenchmarkSTS (rus-Cyrl)": 78.08, - "SICK-R": 77.51, - "SICK-R-PL (pol-Latn)": 69.45, - "SICKFr (fra-Latn)": 74.67, - "STS12": 76.56, - "STS13": 76.97, - "STS14": 75.52, - "STS15": 87.12, - "STS16": 83.63, - "STS17 (ita-Latn_eng-Latn)": 77.31, - "STS17 (eng-Latn_ara-Arab)": 57.39, - "STS17 (en-en)": 86.42, - "STS17 (eng-Latn_tur-Latn)": 55.93, - "STS17 (ara-Arab)": 73.0, - "STS17 (nld-Latn_eng-Latn)": 75.43, - "STS17 (kor-Hang)": 78.87, - "STS17 (fra-Latn_eng-Latn)": 72.28, - "STS17 (spa-Latn)": 84.83, - "STS17 (eng-Latn_deu-Latn)": 76.82, - "STS17 (spa-Latn_eng-Latn)": 72.43, - "STS22 (pol-Latn_eng-Latn)": 72.69, - "STS22 (deu-Latn_eng-Latn)": 56.07, - "STS22 (spa-Latn)": 66.86, - "STS22 (ara-Arab)": 56.65, - "STS22 (rus-Cyrl)": 59.9, - "STS22 (deu-Latn)": 53.45, - "STS22 (cmn-Hans_eng-Latn)": 65.32, - "STS22 (en)": 61.25, - "STS22 (fra-Latn)": 76.58, - "STS22 (ita-Latn)": 76.53, - "STS22 (spa-Latn_ita-Latn)": 71.74, - "STS22 (spa-Latn_eng-Latn)": 74.2, - "STS22 (deu-Latn_fra-Latn)": 60.62, - "STS22 (tur-Latn)": 63.69, - "STS22 (pol-Latn)": 35.78, - "STS22 (fra-Latn_pol-Latn)": 84.52, - "STS22 (cmn-Hans)": 66.85, - "STS22 (deu-Latn_pol-Latn)": 28.24, - "STSB (cmn-Hans)": 77.73, - "STSBenchmark": 84.11, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 78.49, - "STSBenchmarkMultilingualSTS (pol-Latn)": 72.61, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 78.24, - "STSBenchmarkMultilingualSTS (en)": 84.11, - "STSBenchmarkMultilingualSTS (por-Latn)": 77.39, - "STSBenchmarkMultilingualSTS (ita-Latn)": 78.21, - "STSBenchmarkMultilingualSTS (fra-Latn)": 79.2, - "STSBenchmarkMultilingualSTS (deu-Latn)": 79.17, - "STSBenchmarkMultilingualSTS (nld-Latn)": 76.04, - "STSBenchmarkMultilingualSTS (spa-Latn)": 80.31 - }, - { - "Model": "multilingual-e5-small", - "AFQMC": 25.21, - "ATEC": 35.14, - "BQ": 43.27, - "CDSC-R": 90.27, - "LCQMC": 72.7, - "PAWSX": 11.01, - "QBQTC": 30.25, - "SICK-R-PL": 69.46, - "SICKFr": 75.62, - "STS22 (pl)": 35.8, - "STSB": 77.73, - "STSBenchmarkMultilingualSTS (fr)": 79.32 + "Model": "m3e-base", + "AFQMC": 35.87, + "ATEC": 41.27, + "BQ": 63.81, + "LCQMC": 74.88, + "PAWSX": 12.19, + "QBQTC": 32.07, + "STS22": 66.73, + "STSB": 76.97 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "multilingual-e5-small", - "SummEval": 30.04, - "SummEvalFr (fra-Latn)": 31.14 - }, - { - "Model": "multilingual-e5-small", - "SummEval": 30.04, - "SummEvalFr (fra-Latn)": 31.14 - }, - { - "Model": "multilingual-e5-small", - "SummEvalFr": 31.85 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "multilingual-e5-small", - "CEDRClassification (rus-Cyrl)": 40.39, - "SensitiveTopicsClassification (rus-Cyrl)": 24.38 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "multilingual-e5-small", - "Core17InstructionRetrieval": -0.26, - "News21InstructionRetrieval": 0.54, - "Robust04InstructionRetrieval": -7.62 - } - ] + "p-MRR": [] } }, - "distilrubert-small-cased-conversational": { + "m3e-large": { "BitextMining": { - "f1": [ - { - "Model": "distilrubert-small-cased-conversational", - "Tatoeba (rus-Cyrl_eng-Latn)": 24.16 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "distilrubert-small-cased-conversational", - "GeoreviewClassification (rus-Cyrl)": 38.95, - "HeadlineClassification (rus-Cyrl)": 75.59, - "InappropriatenessClassification (rus-Cyrl)": 60.68, - "KinopoiskClassification (rus-Cyrl)": 49.67, - "MassiveIntentClassification (rus-Cyrl)": 63.12, - "MassiveScenarioClassification (rus-Cyrl)": 68.08, - "RuReviewsClassification (rus-Cyrl)": 54.05, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 48.53, - "RuSciBenchOECDClassification (rus-Cyrl)": 37.65 + "Model": "m3e-large", + "AmazonReviewsClassification": 44.44, + "IFlyTek": 43.96, + "JDReview": 86.92, + "MassiveIntentClassification": 67.23, + "MassiveScenarioClassification": 74.88, + "MultilingualSentiment": 72.47, + "OnlineShopping": 89.59, + "TNews": 48.26, + "Waimai": 86.08 } ] }, "Clustering": { "v_measure": [ { - "Model": "distilrubert-small-cased-conversational", - "GeoreviewClusteringP2P (rus-Cyrl)": 43.26, - "MLSUMClusteringP2P (rus-Cyrl)": 50.08, - "MLSUMClusteringS2S (rus-Cyrl)": 51.12, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 37.84, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 34.12 + "Model": "m3e-large", + "CLSClusteringP2P": 38.6, + "CLSClusteringS2S": 38.02, + "ThuNewsClusteringP2P": 60.39, + "ThuNewsClusteringS2S": 58.51 } ] }, "PairClassification": { "max_ap": [ { - "Model": "distilrubert-small-cased-conversational", - "OpusparcusPC (rus-Cyrl)": 84.35, - "TERRa (rus-Cyrl)": 52.48 + "Model": "m3e-large", + "Cmnli": 69.27, + "Ocnli": 59.33 }, { - "Model": "distilrubert-small-cased-conversational", - "OpusparcusPC (rus-Cyrl)": 84.35, - "TERRa (rus-Cyrl)": 53.02 + "Model": "m3e-large", + "Cmnli": 69.27, + "Ocnli": 59.99 } ] }, "Reranking": { "map": [ { - "Model": "distilrubert-small-cased-conversational", - "MIRACLReranking (rus-Cyrl)": 13.09 - }, - { - "Model": "distilrubert-small-cased-conversational", - "RuBQReranking (rus-Cyrl)": 42.58 + "Model": "m3e-large", + "CMedQAv1": 77.76, + "CMedQAv2": 78.27, + "MMarcoReranking": 16.46, + "T2Reranking": 66.13 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "distilrubert-small-cased-conversational", - "MIRACLRetrieval (rus-Cyrl)": 2.39, - "RiaNewsRetrieval (rus-Cyrl)": 4.14, - "RuBQRetrieval (rus-Cyrl)": 10.6 + "Model": "m3e-large", + "CmedqaRetrieval": 30.73, + "CovidRetrieval": 61.33, + "DuRetrieval": 74.69, + "EcomRetrieval": 45.18, + "MMarcoRetrieval": 61.06, + "MedicalRetrieval": 48.66, + "T2Retrieval": 72.36, + "VideoRetrieval": 44.02 } ] }, "STS": { "cosine_spearman": [ { - "Model": "distilrubert-small-cased-conversational", - "RUParaPhraserSTS (rus-Cyrl)": 55.01, - "RuSTSBenchmarkSTS (rus-Cyrl)": 61.72, - "STS22 (rus-Cyrl)": 51.87, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 61.6 + "Model": "m3e-large", + "AFQMC": 36.53, + "ATEC": 41.8, + "BQ": 65.2, + "LCQMC": 74.2, + "PAWSX": 15.95, + "QBQTC": 32.65, + "STS22": 62.91, + "STSB": 74.16 } ] }, @@ -14275,19 +12184,13 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "distilrubert-small-cased-conversational", - "CEDRClassification (rus-Cyrl)": 36.19, - "SensitiveTopicsClassification (rus-Cyrl)": 22.45 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "e5-base-v2": { + "mistral-7b-instruct-v0.2": { "BitextMining": { "f1": [] }, @@ -14295,20 +12198,7 @@ "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "e5-base-v2", - "BiorxivClusteringP2P": 37.12, - "BiorxivClusteringS2S": 33.41, - "MedrxivClusteringP2P": 31.82, - "MedrxivClusteringS2S": 29.68, - "RedditClustering": 56.54, - "RedditClusteringP2P": 63.23, - "StackExchangeClustering": 64.6, - "StackExchangeClusteringP2P": 33.02, - "TwentyNewsgroupsClustering": 49.86 - } - ] + "v_measure": [] }, "PairClassification": { "max_ap": [] @@ -14331,46 +12221,105 @@ "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-base-v2", - "Core17InstructionRetrieval": -2.9, - "News21InstructionRetrieval": -2.0, - "Robust04InstructionRetrieval": -6.73 + "Model": "mistral-7b-instruct-v0.2", + "Core17InstructionRetrieval": 13.03, + "News21InstructionRetrieval": 4.81, + "Robust04InstructionRetrieval": 12.61 } ] } }, - "gelectra-base": { + "mistral-embed": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "mistral-embed", + "AmazonReviewsClassification": 41.59, + "MTOPDomainClassification": 90.05, + "MTOPIntentClassification": 66.09, + "MasakhaNEWSClassification": 81.4, + "MassiveIntentClassification": 62.83, + "MassiveScenarioClassification": 69.71 + } + ] }, "Clustering": { "v_measure": [ { - "Model": "gelectra-base", - "BlurbsClusteringP2P": 10.06, - "BlurbsClusteringS2S": 7.74, - "TenKGnadClusteringP2P": 9.02, - "TenKGnadClusteringS2S": 4.11 + "Model": "mistral-embed", + "AlloProfClusteringP2P": 62.01, + "AlloProfClusteringS2S": 49.2, + "HALClusteringS2S": 26.17, + "MLSUMClusteringP2P": 45.28, + "MLSUMClusteringS2S": 42.74, + "MasakhaNEWSClusteringP2P": 48.13, + "MasakhaNEWSClusteringS2S": 39.62 } ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "mistral-embed", + "OpusparcusPC": 92.61, + "PawsXPairClassification": 62.02 + }, + { + "Model": "mistral-embed", + "OpusparcusPC": 92.64, + "PawsXPairClassification": 62.05 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "mistral-embed", + "AlloprofReranking": 72.36, + "SyntecReranking": 88.57 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "mistral-embed", + "AILACasedocs": 38.2, + "AILAStatutes": 44.81, + "AlloprofRetrieval": 56.84, + "BSARDRetrieval": 2.48, + "GerDaLIRSmall": 17.85, + "LeCaRDv2": 61.12, + "LegalBenchConsumerContractsQA": 80.8, + "LegalBenchCorporateLobbying": 94.11, + "LegalQuAD": 47.17, + "LegalSummarization": 67.39, + "MintakaRetrieval": 21.73, + "SyntecRetrieval": 78.77, + "XPQARetrieval": 74.24 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "mistral-embed", + "SICKFr": 76.21, + "STS22": 82.74, + "STSBenchmarkMultilingualSTS": 79.72 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "mistral-embed", + "SummEvalFr": 31.47 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -14418,7 +12367,7 @@ ] } }, - "text-search-ada-001": { + "monot5-3b-msmarco-10k": { "BitextMining": { "f1": [] }, @@ -14426,14 +12375,7 @@ "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "text-search-ada-001", - "BiorxivClusteringS2S": 26.05, - "MedrxivClusteringS2S": 25.67, - "TwentyNewsgroupsClustering": 44.92 - } - ] + "v_measure": [] }, "PairClassification": { "max_ap": [] @@ -14442,25 +12384,7 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "text-search-ada-001", - "ArguAna": 46.91, - "ClimateFEVER": 18.5, - "DBPedia": 36.2, - "FEVER": 72.1, - "FiQA2018": 38.41, - "HotpotQA": 59.39, - "MSMARCO": 37.94, - "NFCorpus": 33.17, - "NQ": 42.81, - "QuoraRetrieval": 70.57, - "SCIDOCS": 14.83, - "SciFact": 67.25, - "TRECCOVID": 72.43, - "Touche2020": 28.68 - } - ] + "ndcg_at_10": [] }, "STS": { "cosine_spearman": [] @@ -14472,10 +12396,17 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "monot5-3b-msmarco-10k", + "Core17InstructionRetrieval": 1.84, + "News21InstructionRetrieval": 1.78, + "Robust04InstructionRetrieval": 3.96 + } + ] } }, - "bge-base-en-v1.5": { + "monot5-base-msmarco-10k": { "BitextMining": { "f1": [] }, @@ -14483,20 +12414,7 @@ "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "bge-base-en-v1.5", - "BiorxivClusteringP2P": 39.44, - "BiorxivClusteringS2S": 36.62, - "MedrxivClusteringP2P": 33.21, - "MedrxivClusteringS2S": 31.68, - "RedditClustering": 56.61, - "RedditClusteringP2P": 62.66, - "StackExchangeClustering": 66.11, - "StackExchangeClusteringP2P": 35.24, - "TwentyNewsgroupsClustering": 50.75 - } - ] + "v_measure": [] }, "PairClassification": { "max_ap": [] @@ -14505,26 +12423,7 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "bge-base-en-v1.5", - "ARCChallenge": 9.66, - "AlphaNLI": 10.99, - "HellaSwag": 26.64, - "PIQA": 25.69, - "Quail": 1.42, - "RARbCode": 46.47, - "RARbMath": 46.86, - "SIQA": 0.94, - "SpartQA": 3.37, - "TempReasonL1": 1.07, - "TempReasonL2Fact": 17.23, - "TempReasonL2Pure": 1.29, - "TempReasonL3Fact": 13.36, - "TempReasonL3Pure": 5.2, - "WinoGrande": 13.76 - } - ] + "ndcg_at_10": [] }, "STS": { "cosine_spearman": [] @@ -14536,266 +12435,128 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "monot5-base-msmarco-10k", + "Core17InstructionRetrieval": -4.06, + "News21InstructionRetrieval": 5.02, + "Robust04InstructionRetrieval": -6.2 + } + ] } }, - "allenai-specter": { + "msmarco-bert-co-condensor": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "allenai-specter", - "AmazonCounterfactualClassification (de)": 54.46, - "AmazonCounterfactualClassification (en)": 58.7, - "AmazonCounterfactualClassification (en-ext)": 59.28, - "AmazonCounterfactualClassification (ja)": 43.87, - "AmazonPolarityClassification": 57.77, - "AmazonReviewsClassification (de)": 24.08, - "AmazonReviewsClassification (en)": 26.26, - "AmazonReviewsClassification (es)": 23.88, - "AmazonReviewsClassification (fr)": 23.31, - "AmazonReviewsClassification (ja)": 20.25, - "AmazonReviewsClassification (zh)": 20.49, - "Banking77Classification": 66.66, - "EmotionClassification": 24.82, - "ImdbClassification": 56.35, - "MTOPDomainClassification (de)": 48.55, - "MTOPDomainClassification (en)": 74.53, - "MTOPDomainClassification (es)": 58.39, - "MTOPDomainClassification (fr)": 54.61, - "MTOPDomainClassification (hi)": 21.22, - "MTOPDomainClassification (th)": 14.98, - "MTOPIntentClassification (de)": 35.55, - "MTOPIntentClassification (en)": 50.05, - "MTOPIntentClassification (es)": 36.72, - "MTOPIntentClassification (fr)": 34.71, - "MTOPIntentClassification (hi)": 4.44, - "MTOPIntentClassification (th)": 4.67, - "MassiveIntentClassification (af)": 33.68, - "MassiveIntentClassification (am)": 2.94, - "MassiveIntentClassification (ar)": 10.04, - "MassiveIntentClassification (az)": 30.74, - "MassiveIntentClassification (bn)": 3.02, - "MassiveIntentClassification (cy)": 33.94, - "MassiveIntentClassification (da)": 38.47, - "MassiveIntentClassification (de)": 36.06, - "MassiveIntentClassification (el)": 27.7, - "MassiveIntentClassification (en)": 51.73, - "MassiveIntentClassification (es)": 35.6, - "MassiveIntentClassification (fa)": 17.97, - "MassiveIntentClassification (fi)": 35.53, - "MassiveIntentClassification (fr)": 38.41, - "MassiveIntentClassification (he)": 2.69, - "MassiveIntentClassification (hi)": 3.43, - "MassiveIntentClassification (hu)": 34.05, - "MassiveIntentClassification (hy)": 3.11, - "MassiveIntentClassification (id)": 40.02, - "MassiveIntentClassification (is)": 32.63, - "MassiveIntentClassification (it)": 39.28, - "MassiveIntentClassification (ja)": 4.95, - "MassiveIntentClassification (jv)": 34.95, - "MassiveIntentClassification (ka)": 2.57, - "MassiveIntentClassification (km)": 4.73, - "MassiveIntentClassification (kn)": 3.54, - "MassiveIntentClassification (ko)": 2.68, - "MassiveIntentClassification (lv)": 37.91, - "MassiveIntentClassification (ml)": 2.88, - "MassiveIntentClassification (mn)": 16.94, - "MassiveIntentClassification (ms)": 36.6, - "MassiveIntentClassification (my)": 3.96, - "MassiveIntentClassification (nb)": 34.75, - "MassiveIntentClassification (nl)": 33.95, - "MassiveIntentClassification (pl)": 35.77, - "MassiveIntentClassification (pt)": 43.05, - "MassiveIntentClassification (ro)": 36.2, - "MassiveIntentClassification (ru)": 25.3, - "MassiveIntentClassification (sl)": 35.9, - "MassiveIntentClassification (sq)": 36.6, - "MassiveIntentClassification (sv)": 36.0, - "MassiveIntentClassification (sw)": 34.81, - "MassiveIntentClassification (ta)": 3.11, - "MassiveIntentClassification (te)": 2.53, - "MassiveIntentClassification (th)": 4.38, - "MassiveIntentClassification (tl)": 35.51, - "MassiveIntentClassification (tr)": 32.02, - "MassiveIntentClassification (ur)": 9.61, - "MassiveIntentClassification (vi)": 37.07, - "MassiveIntentClassification (zh-CN)": 2.81, - "MassiveIntentClassification (zh-TW)": 4.79, - "MassiveScenarioClassification (af)": 36.17, - "MassiveScenarioClassification (am)": 7.64, - "MassiveScenarioClassification (ar)": 15.26, - "MassiveScenarioClassification (az)": 30.73, - "MassiveScenarioClassification (bn)": 7.15, - "MassiveScenarioClassification (cy)": 34.73, - "MassiveScenarioClassification (da)": 39.93, - "MassiveScenarioClassification (de)": 38.62, - "MassiveScenarioClassification (el)": 27.18, - "MassiveScenarioClassification (en)": 58.58, - "MassiveScenarioClassification (es)": 39.44, - "MassiveScenarioClassification (fa)": 21.43, - "MassiveScenarioClassification (fi)": 33.21, - "MassiveScenarioClassification (fr)": 40.26, - "MassiveScenarioClassification (he)": 7.42, - "MassiveScenarioClassification (hi)": 8.06, - "MassiveScenarioClassification (hu)": 34.54, - "MassiveScenarioClassification (hy)": 8.61, - "MassiveScenarioClassification (id)": 40.04, - "MassiveScenarioClassification (is)": 33.57, - "MassiveScenarioClassification (it)": 40.1, - "MassiveScenarioClassification (ja)": 9.96, - "MassiveScenarioClassification (jv)": 36.11, - "MassiveScenarioClassification (ka)": 7.13, - "MassiveScenarioClassification (km)": 9.66, - "MassiveScenarioClassification (kn)": 7.55, - "MassiveScenarioClassification (ko)": 7.27, - "MassiveScenarioClassification (lv)": 37.03, - "MassiveScenarioClassification (ml)": 7.22, - "MassiveScenarioClassification (mn)": 21.53, - "MassiveScenarioClassification (ms)": 37.57, - "MassiveScenarioClassification (my)": 9.54, - "MassiveScenarioClassification (nb)": 35.71, - "MassiveScenarioClassification (nl)": 34.62, - "MassiveScenarioClassification (pl)": 36.87, - "MassiveScenarioClassification (pt)": 44.68, - "MassiveScenarioClassification (ro)": 37.29, - "MassiveScenarioClassification (ru)": 28.16, - "MassiveScenarioClassification (sl)": 37.95, - "MassiveScenarioClassification (sq)": 37.82, - "MassiveScenarioClassification (sv)": 35.35, - "MassiveScenarioClassification (sw)": 35.37, - "MassiveScenarioClassification (ta)": 7.19, - "MassiveScenarioClassification (te)": 7.29, - "MassiveScenarioClassification (th)": 9.47, - "MassiveScenarioClassification (tl)": 37.31, - "MassiveScenarioClassification (tr)": 34.57, - "MassiveScenarioClassification (ur)": 16.17, - "MassiveScenarioClassification (vi)": 35.91, - "MassiveScenarioClassification (zh-CN)": 9.19, - "MassiveScenarioClassification (zh-TW)": 10.19, - "ToxicConversationsClassification": 57.44, - "TweetSentimentExtractionClassification": 45.52 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "allenai-specter", - "ArxivClusteringP2P": 44.75, - "ArxivClusteringS2S": 35.27, - "BiorxivClusteringP2P": 39.52, - "BiorxivClusteringS2S": 34.53, - "MedrxivClusteringP2P": 35.04, - "MedrxivClusteringS2S": 31.66, - "RedditClustering": 24.13, - "RedditClusteringP2P": 35.06, - "StackExchangeClustering": 39.01, - "StackExchangeClusteringP2P": 31.46, - "TwentyNewsgroupsClustering": 24.22 + "Model": "msmarco-bert-co-condensor", + "AmazonCounterfactualClassification": 64.06, + "AmazonPolarityClassification": 66.88, + "AmazonReviewsClassification": 34.85, + "Banking77Classification": 82.35, + "EmotionClassification": 41.91, + "ImdbClassification": 60.17, + "MTOPDomainClassification": 91.34, + "MTOPIntentClassification": 71.07, + "MassiveIntentClassification": 70.4, + "MassiveScenarioClassification": 73.73, + "ToxicConversationsClassification": 64.01, + "TweetSentimentExtractionClassification": 55.74 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "msmarco-bert-co-condensor", + "ArxivClusteringP2P": 36.94, + "ArxivClusteringS2S": 29.03, + "BiorxivClusteringP2P": 32.35, + "BiorxivClusteringS2S": 28.16, + "MedrxivClusteringP2P": 30.23, + "MedrxivClusteringS2S": 27.01, + "RedditClustering": 48.04, + "RedditClusteringP2P": 53.53, + "StackExchangeClustering": 59.54, + "StackExchangeClusteringP2P": 30.48, + "TwentyNewsgroupsClustering": 38.68 } ] }, "PairClassification": { "max_ap": [ { - "Model": "allenai-specter", - "SprintDuplicateQuestions": 71.63, - "TwitterSemEval2015": 43.25, - "TwitterURLCorpus": 69.22 + "Model": "msmarco-bert-co-condensor", + "SprintDuplicateQuestions": 96.09, + "TwitterSemEval2015": 65.95, + "TwitterURLCorpus": 83.17 }, { - "Model": "allenai-specter", - "SprintDuplicateQuestions": 71.63, - "TwitterSemEval2015": 43.25, - "TwitterURLCorpus": 69.22 + "Model": "msmarco-bert-co-condensor", + "SprintDuplicateQuestions": 96.09, + "TwitterSemEval2015": 65.95, + "TwitterURLCorpus": 83.17 } ] }, "Reranking": { "map": [ { - "Model": "allenai-specter", - "AskUbuntuDupQuestions": 50.07, - "MindSmallReranking": 24.8, - "SciDocsRR": 81.31, - "StackOverflowDupQuestions": 36.22 + "Model": "msmarco-bert-co-condensor", + "AskUbuntuDupQuestions": 58.99, + "MindSmallReranking": 27.13, + "SciDocsRR": 72.78, + "StackOverflowDupQuestions": 48.48 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "allenai-specter", - "ArguAna": 32.67, - "CQADupstackRetrieval": 14.6, - "ClimateFEVER": 6.86, - "DBPedia": 4.14, - "FEVER": 5.45, - "FiQA2018": 5.64, - "HotpotQA": 5.46, - "MSMARCO": 5.59, - "NFCorpus": 0.85, - "NQ": 5.99, - "QuoraRetrieval": 64.65, - "SCIDOCS": 0.0, - "SciFact": 47.88, - "TRECCOVID": 29.91, - "Touche2020": 8.46 + "Model": "msmarco-bert-co-condensor", + "ArguAna": 45.15, + "CQADupstackRetrieval": 27.72, + "ClimateFEVER": 16.96, + "DBPedia": 27.86, + "FEVER": 45.68, + "FiQA2018": 15.62, + "HotpotQA": 35.61, + "MSMARCO": 29.57, + "NFCorpus": 22.29, + "NQ": 29.85, + "QuoraRetrieval": 86.51, + "SCIDOCS": 10.13, + "SciFact": 52.31, + "TRECCOVID": 40.54, + "Touche2020": 8.57 } ] }, "STS": { "cosine_spearman": [ { - "Model": "allenai-specter", - "BIOSSES": 64.95, - "SICK-R": 56.39, - "STS12": 62.49, - "STS13": 58.7, - "STS14": 54.87, - "STS15": 62.54, - "STS16": 64.27, - "STS17 (ar-ar)": 27.14, - "STS17 (en-ar)": 6.9, - "STS17 (en-de)": 11.59, - "STS17 (en-en)": 69.63, - "STS17 (en-tr)": 6.46, - "STS17 (es-en)": 10.86, - "STS17 (es-es)": 55.45, - "STS17 (fr-en)": 16.02, - "STS17 (it-en)": 19.87, - "STS17 (ko-ko)": 8.08, - "STS17 (nl-en)": 24.92, - "STS22 (ar)": 19.57, - "STS22 (de)": 17.31, - "STS22 (de-en)": 26.03, - "STS22 (de-fr)": 10.26, - "STS22 (de-pl)": 16.94, - "STS22 (en)": 55.06, - "STS22 (es)": 48.89, - "STS22 (es-en)": 51.79, - "STS22 (es-it)": 25.24, - "STS22 (fr)": 53.92, - "STS22 (fr-pl)": 39.44, - "STS22 (it)": 39.43, - "STS22 (pl)": 13.56, - "STS22 (pl-en)": 25.36, - "STS22 (ru)": 1.11, - "STS22 (tr)": 31.73, - "STS22 (zh)": 16.35, - "STS22 (zh-en)": 8.44, - "STSBenchmark": 61.26 + "Model": "msmarco-bert-co-condensor", + "BIOSSES": 77.32, + "SICK-R": 72.0, + "STS12": 68.19, + "STS13": 80.4, + "STS14": 74.02, + "STS15": 82.57, + "STS16": 79.78, + "STS17": 85.94, + "STS22": 67.54, + "STSBenchmark": 76.97 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "allenai-specter", - "SummEval": 27.66 + "Model": "msmarco-bert-co-condensor", + "SummEval": 29.5 } ] }, @@ -14806,116 +12567,90 @@ "p-MRR": [] } }, - "titan-embed-text-v1": { + "multi-qa-MiniLM-L6-cos-v1": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "titan-embed-text-v1", - "AmazonCounterfactualClassification (en)": 61.85, - "Banking77Classification": 83.21 + "Model": "multi-qa-MiniLM-L6-cos-v1", + "AmazonReviewsClassification": 27.05, + "MTOPDomainClassification": 72.97, + "MTOPIntentClassification": 37.18, + "MasakhaNEWSClassification": 75.62, + "MassiveIntentClassification": 42.64, + "MassiveScenarioClassification": 49.92 } ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "multi-qa-MiniLM-L6-cos-v1", + "AlloProfClusteringP2P": 49.13, + "AlloProfClusteringS2S": 26.16, + "HALClusteringS2S": 12.49, + "MLSUMClusteringP2P": 35.15, + "MLSUMClusteringS2S": 25.95, + "MasakhaNEWSClusteringP2P": 53.73, + "MasakhaNEWSClusteringS2S": 27.27 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "multi-qa-MiniLM-L6-cos-v1", + "OpusparcusPC": 88.07, + "PawsXPairClassification": 57.36 + }, + { + "Model": "multi-qa-MiniLM-L6-cos-v1", + "OpusparcusPC": 88.07, + "PawsXPairClassification": 57.48 + } + ] }, "Reranking": { "map": [ { - "Model": "titan-embed-text-v1", - "SciDocsRR": 88.87 + "Model": "multi-qa-MiniLM-L6-cos-v1", + "AlloprofReranking": 40.28, + "SyntecReranking": 65.08 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "titan-embed-text-v1", - "ArguAna": 48.83, - "FiQA2018": 40.38, - "MSMARCO": 35.19, - "NQ": 51.08, - "SciFact": 73.5, - "TRECCOVID": 54.74 + "Model": "multi-qa-MiniLM-L6-cos-v1", + "AlloprofRetrieval": 30.23, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 16.31, + "SyntecRetrieval": 58.07, + "XPQARetrieval": 48.83 } ] }, "STS": { "cosine_spearman": [ { - "Model": "titan-embed-text-v1", - "BIOSSES": 84.17, - "SICK-R": 73.05, - "STS12": 66.59, - "STS13": 83.24, - "STS14": 73.71, - "STS15": 82.4, - "STS16": NaN, - "STS17 (en-en)": 80.9, - "STSBenchmark": 74.85 + "Model": "multi-qa-MiniLM-L6-cos-v1", + "SICKFr": 62.11, + "STS22": 74.62, + "STSBenchmarkMultilingualSTS": 63.85 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "bge-large-en-v1.5-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "cosine_spearman": [ { - "Model": "bge-large-en-v1.5-instruct", - "ARCChallenge": 8.86, - "AlphaNLI": 0.86, - "HellaSwag": 26.24, - "PIQA": 23.26, - "Quail": 2.72, - "RARbCode": 45.25, - "RARbMath": 49.82, - "SIQA": 0.59, - "SpartQA": 2.34, - "TempReasonL1": 1.17, - "TempReasonL2Fact": 21.19, - "TempReasonL2Pure": 2.1, - "TempReasonL3Fact": 17.59, - "TempReasonL3Pure": 5.99, - "WinoGrande": 10.31 + "Model": "multi-qa-MiniLM-L6-cos-v1", + "SummEvalFr": 27.59 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, @@ -14923,540 +12658,2345 @@ "p-MRR": [] } }, - "nomic-embed-text-v1.5-256": { + "multilingual-e5-base": { "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [ + "f1": [ { - "Model": "nomic-embed-text-v1.5-256", - "AmazonCounterfactualClassification (en)": 72.94, - "AmazonPolarityClassification": 91.35, - "AmazonReviewsClassification (en)": 45.73, - "Banking77Classification": 83.69, - "EmotionClassification": 45.88, - "ImdbClassification": 83.99, - "MTOPDomainClassification (en)": 91.68, - "MTOPIntentClassification (en)": 72.47, - "MassiveIntentClassification (en)": 71.76, - "MassiveScenarioClassification (en)": 75.67, - "ToxicConversationsClassification": 70.87, - "TweetSentimentExtractionClassification": 59.2 + "Model": "multilingual-e5-base", + "BornholmBitextMining": 33.22, + "Tatoeba (csb-eng)": 24.29, + "Tatoeba (spa-eng)": 96.97, + "Tatoeba (kzj-eng)": 6.26, + "Tatoeba (uzb-eng)": 62.63, + "Tatoeba (mal-eng)": 96.72, + "Tatoeba (dtp-eng)": 5.13, + "Tatoeba (ces-eng)": 88.75, + "Tatoeba (mon-eng)": 78.37, + "Tatoeba (fry-eng)": 50.82, + "Tatoeba (cbk-eng)": 60.66, + "Tatoeba (awa-eng)": 68.39, + "Tatoeba (cha-eng)": 16.95, + "Tatoeba (nld-eng)": 93.2, + "Tatoeba (mhr-eng)": 5.52, + "Tatoeba (cmn-eng)": 93.35, + "Tatoeba (arq-eng)": 26.61, + "Tatoeba (kor-eng)": 83.37, + "Tatoeba (pol-eng)": 94.57, + "Tatoeba (uig-eng)": 62.97, + "Tatoeba (zsm-eng)": 92.45, + "Tatoeba (tuk-eng)": 19.67, + "Tatoeba (ind-eng)": 90.26, + "Tatoeba (aze-eng)": 84.71, + "Tatoeba (xho-eng)": 73.24, + "Tatoeba (war-eng)": 47.18, + "Tatoeba (ast-eng)": 74.36, + "Tatoeba (ido-eng)": 74.41, + "Tatoeba (dsb-eng)": 34.36, + "Tatoeba (est-eng)": 70.64, + "Tatoeba (ina-eng)": 86.11, + "Tatoeba (bel-eng)": 86.7, + "Tatoeba (fin-eng)": 86.15, + "Tatoeba (ang-eng)": 29.87, + "Tatoeba (gle-eng)": 58.62, + "Tatoeba (slk-eng)": 86.42, + "Tatoeba (hye-eng)": 85.85, + "Tatoeba (tgl-eng)": 83.78, + "Tatoeba (pam-eng)": 6.92, + "Tatoeba (eus-eng)": 56.26, + "Tatoeba (slv-eng)": 81.93, + "Tatoeba (nno-eng)": 82.67, + "Tatoeba (wuu-eng)": 78.65, + "Tatoeba (nds-eng)": 53.86, + "Tatoeba (deu-eng)": 97.07, + "Tatoeba (ita-eng)": 90.61, + "Tatoeba (rus-eng)": 91.78, + "Tatoeba (bos-eng)": 88.86, + "Tatoeba (glg-eng)": 82.69, + "Tatoeba (kaz-eng)": 75.56, + "Tatoeba (nov-eng)": 66.96, + "Tatoeba (tam-eng)": 85.12, + "Tatoeba (dan-eng)": 91.23, + "Tatoeba (oci-eng)": 35.79, + "Tatoeba (arz-eng)": 66.79, + "Tatoeba (gsw-eng)": 43.53, + "Tatoeba (jav-eng)": 61.25, + "Tatoeba (sqi-eng)": 90.06, + "Tatoeba (vie-eng)": 94.55, + "Tatoeba (lit-eng)": 75.53, + "Tatoeba (isl-eng)": 76.9, + "Tatoeba (hsb-eng)": 40.36, + "Tatoeba (hun-eng)": 84.41, + "Tatoeba (kab-eng)": 21.77, + "Tatoeba (ceb-eng)": 45.46, + "Tatoeba (ber-eng)": 23.59, + "Tatoeba (jpn-eng)": 90.3, + "Tatoeba (max-eng)": 52.4, + "Tatoeba (ara-eng)": 82.86, + "Tatoeba (nob-eng)": 95.9, + "Tatoeba (cat-eng)": 84.09, + "Tatoeba (orv-eng)": 16.0, + "Tatoeba (cor-eng)": 4.38, + "Tatoeba (tel-eng)": 88.49, + "Tatoeba (ell-eng)": 89.96, + "Tatoeba (bre-eng)": 5.44, + "Tatoeba (swg-eng)": 42.33, + "Tatoeba (pes-eng)": 87.18, + "Tatoeba (cym-eng)": 65.69, + "Tatoeba (heb-eng)": 74.26, + "Tatoeba (urd-eng)": 86.2, + "Tatoeba (amh-eng)": 74.93, + "Tatoeba (lvs-eng)": 76.76, + "Tatoeba (swe-eng)": 91.33, + "Tatoeba (ukr-eng)": 88.29, + "Tatoeba (gla-eng)": 43.08, + "Tatoeba (mar-eng)": 86.62, + "Tatoeba (khm-eng)": 47.27, + "Tatoeba (tat-eng)": 66.92, + "Tatoeba (lat-eng)": 39.62, + "Tatoeba (pms-eng)": 44.61, + "Tatoeba (hrv-eng)": 92.5, + "Tatoeba (mkd-eng)": 73.76, + "Tatoeba (bul-eng)": 88.95, + "Tatoeba (kat-eng)": 77.83, + "Tatoeba (hin-eng)": 93.13, + "Tatoeba (fao-eng)": 64.72, + "Tatoeba (ben-eng)": 81.05, + "Tatoeba (epo-eng)": 92.07, + "Tatoeba (por-eng)": 92.74, + "Tatoeba (tur-eng)": 92.54, + "Tatoeba (yue-eng)": 80.66, + "Tatoeba (srp-eng)": 89.08, + "Tatoeba (swh-eng)": 66.81, + "Tatoeba (tha-eng)": 94.22, + "Tatoeba (ron-eng)": 91.27, + "Tatoeba (fra-eng)": 92.76, + "Tatoeba (yid-eng)": 63.2, + "Tatoeba (afr-eng)": 87.04, + "Tatoeba (tzl-eng)": 34.44, + "Tatoeba (lfn-eng)": 52.85, + "Tatoeba (ile-eng)": 72.56, + "Tatoeba (kur-eng)": 52.96 } ] }, - "Clustering": { - "v_measure": [ + "Classification": { + "accuracy": [ { - "Model": "nomic-embed-text-v1.5-256", - "ArxivClusteringP2P": 44.82, - "ArxivClusteringS2S": 35.32, - "BiorxivClusteringP2P": 38.19, - "BiorxivClusteringS2S": 31.83, - "MedrxivClusteringP2P": 34.08, - "MedrxivClusteringS2S": 30.98, - "RedditClustering": 54.92, - "RedditClusteringP2P": 60.23, - "StackExchangeClustering": 61.81, - "StackExchangeClusteringP2P": 34.03, - "TwentyNewsgroupsClustering": 48.56 + "Model": "multilingual-e5-base", + "AllegroReviews": 40.78, + "AmazonCounterfactualClassification (en-ext)": 76.91, + "AmazonCounterfactualClassification (en)": 77.36, + "AmazonCounterfactualClassification (de)": 70.81, + "AmazonCounterfactualClassification (ja)": 72.02, + "AmazonPolarityClassification": 91.76, + "AmazonReviewsClassification": 40.94, + "AmazonReviewsClassification (en)": 47.54, + "AmazonReviewsClassification (de)": 44.37, + "AmazonReviewsClassification (es)": 43.38, + "AmazonReviewsClassification (fr)": 41.55, + "AmazonReviewsClassification (ja)": 39.57, + "AmazonReviewsClassification (zh)": 38.34, + "AngryTweetsClassification": 56.28, + "Banking77Classification": 73.53, + "CBD": 62.6, + "DKHateClassification": 63.53, + "DanishPoliticalCommentsClassification": 36.41, + "EmotionClassification": 45.68, + "GeoreviewClassification": 46.05, + "HeadlineClassification": 75.64, + "IFlyTek": 40.81, + "ImdbClassification": 84.29, + "InappropriatenessClassification": 58.78, + "JDReview": 75.72, + "KinopoiskClassification": 50.89, + "LccSentimentClassification": 60.13, + "MTOPDomainClassification": 84.79, + "MTOPDomainClassification (en)": 90.9, + "MTOPDomainClassification (de)": 87.94, + "MTOPDomainClassification (es)": 85.96, + "MTOPDomainClassification (fr)": 82.88, + "MTOPDomainClassification (hi)": 83.92, + "MTOPDomainClassification (th)": 83.94, + "MTOPIntentClassification": 55.51, + "MTOPIntentClassification (en)": 61.6, + "MTOPIntentClassification (de)": 61.05, + "MTOPIntentClassification (es)": 55.36, + "MTOPIntentClassification (fr)": 52.23, + "MTOPIntentClassification (hi)": 53.93, + "MTOPIntentClassification (th)": 58.69, + "MasakhaNEWSClassification": 79.69, + "MasakhaNEWSClassification (amh)": 83.8, + "MasakhaNEWSClassification (eng)": 76.49, + "MasakhaNEWSClassification (fra)": 76.35, + "MasakhaNEWSClassification (hau)": 74.63, + "MasakhaNEWSClassification (ibo)": 64.59, + "MasakhaNEWSClassification (lin)": 70.57, + "MasakhaNEWSClassification (lug)": 68.12, + "MasakhaNEWSClassification (orm)": 71.75, + "MasakhaNEWSClassification (pcm)": 91.05, + "MasakhaNEWSClassification (run)": 73.35, + "MasakhaNEWSClassification (sna)": 84.17, + "MasakhaNEWSClassification (som)": 60.1, + "MasakhaNEWSClassification (swa)": 70.74, + "MasakhaNEWSClassification (tir)": 67.1, + "MasakhaNEWSClassification (xho)": 76.03, + "MasakhaNEWSClassification (yor)": 72.75, + "MassiveIntentClassification": 61.04, + "MassiveIntentClassification (es)": 61.13, + "MassiveIntentClassification (de)": 59.82, + "MassiveIntentClassification (mn)": 46.8, + "MassiveIntentClassification (sq)": 51.07, + "MassiveIntentClassification (sv)": 62.43, + "MassiveIntentClassification (tr)": 60.69, + "MassiveIntentClassification (hy)": 48.77, + "MassiveIntentClassification (az)": 51.36, + "MassiveIntentClassification (da)": 60.69, + "MassiveIntentClassification (my)": 46.67, + "MassiveIntentClassification (th)": 59.63, + "MassiveIntentClassification (sl)": 53.84, + "MassiveIntentClassification (sw)": 45.24, + "MassiveIntentClassification (pl)": 60.98, + "MassiveIntentClassification (bn)": 51.69, + "MassiveIntentClassification (nb)": 60.06, + "MassiveIntentClassification (fi)": 58.91, + "MassiveIntentClassification (jv)": 43.23, + "MassiveIntentClassification (id)": 58.7, + "MassiveIntentClassification (ka)": 37.56, + "MassiveIntentClassification (af)": 49.82, + "MassiveIntentClassification (km)": 32.14, + "MassiveIntentClassification (lv)": 51.17, + "MassiveIntentClassification (tl)": 48.99, + "MassiveIntentClassification (ko)": 59.97, + "MassiveIntentClassification (am)": 42.4, + "MassiveIntentClassification (ar)": 50.2, + "MassiveIntentClassification (el)": 58.07, + "MassiveIntentClassification (fa)": 59.51, + "MassiveIntentClassification (hi)": 56.75, + "MassiveIntentClassification (fr)": 61.32, + "MassiveIntentClassification (is)": 44.52, + "MassiveIntentClassification (te)": 50.09, + "MassiveIntentClassification (ja)": 62.3, + "MassiveIntentClassification (vi)": 59.61, + "MassiveIntentClassification (hu)": 57.69, + "MassiveIntentClassification (en)": 65.71, + "MassiveIntentClassification (ms)": 52.85, + "MassiveIntentClassification (zh-CN)": 63.22, + "MassiveIntentClassification (ro)": 56.83, + "MassiveIntentClassification (he)": 55.3, + "MassiveIntentClassification (ur)": 51.3, + "MassiveIntentClassification (pt)": 62.12, + "MassiveIntentClassification (it)": 61.29, + "MassiveIntentClassification (ta)": 48.93, + "MassiveIntentClassification (cy)": 37.05, + "MassiveIntentClassification (kn)": 48.63, + "MassiveIntentClassification (nl)": 61.23, + "MassiveIntentClassification (ml)": 53.75, + "MassiveIntentClassification (zh-TW)": 56.4, + "MassiveIntentClassification (ru)": 62.78, + "MassiveScenarioClassification": 66.11, + "MassiveScenarioClassification (az)": 55.15, + "MassiveScenarioClassification (fr)": 67.37, + "MassiveScenarioClassification (ko)": 67.9, + "MassiveScenarioClassification (da)": 67.97, + "MassiveScenarioClassification (fi)": 64.94, + "MassiveScenarioClassification (ro)": 63.5, + "MassiveScenarioClassification (zh-TW)": 63.73, + "MassiveScenarioClassification (ar)": 58.0, + "MassiveScenarioClassification (kn)": 53.49, + "MassiveScenarioClassification (nl)": 68.62, + "MassiveScenarioClassification (sq)": 57.92, + "MassiveScenarioClassification (ta)": 53.86, + "MassiveScenarioClassification (am)": 50.33, + "MassiveScenarioClassification (ml)": 59.89, + "MassiveScenarioClassification (hu)": 65.75, + "MassiveScenarioClassification (vi)": 66.35, + "MassiveScenarioClassification (tl)": 54.36, + "MassiveScenarioClassification (ka)": 43.38, + "MassiveScenarioClassification (sl)": 58.3, + "MassiveScenarioClassification (cy)": 43.84, + "MassiveScenarioClassification (it)": 66.17, + "MassiveScenarioClassification (is)": 53.28, + "MassiveScenarioClassification (fa)": 63.92, + "MassiveScenarioClassification (pt)": 65.49, + "MassiveScenarioClassification (mn)": 51.87, + "MassiveScenarioClassification (lv)": 56.42, + "MassiveScenarioClassification (ru)": 68.21, + "MassiveScenarioClassification (bn)": 57.0, + "MassiveScenarioClassification (en)": 71.57, + "MassiveScenarioClassification (hy)": 53.63, + "MassiveScenarioClassification (tr)": 65.18, + "MassiveScenarioClassification (ja)": 69.89, + "MassiveScenarioClassification (nb)": 66.57, + "MassiveScenarioClassification (sv)": 69.35, + "MassiveScenarioClassification (id)": 63.6, + "MassiveScenarioClassification (th)": 67.37, + "MassiveScenarioClassification (de)": 68.4, + "MassiveScenarioClassification (jv)": 51.94, + "MassiveScenarioClassification (sw)": 52.64, + "MassiveScenarioClassification (ms)": 58.35, + "MassiveScenarioClassification (es)": 66.47, + "MassiveScenarioClassification (te)": 54.24, + "MassiveScenarioClassification (he)": 62.53, + "MassiveScenarioClassification (my)": 50.77, + "MassiveScenarioClassification (af)": 58.95, + "MassiveScenarioClassification (el)": 65.38, + "MassiveScenarioClassification (km)": 38.45, + "MassiveScenarioClassification (pl)": 66.12, + "MassiveScenarioClassification (ur)": 56.74, + "MassiveScenarioClassification (hi)": 62.91, + "MassiveScenarioClassification (zh-CN)": 70.24, + "MultilingualSentiment": 67.56, + "NoRecClassification": 53.74, + "NordicLangClassification": 75.85, + "NorwegianParliament": 59.94, + "OnlineShopping": 88.66, + "PAC": 70.87, + "PolEmo2.0-IN": 67.59, + "PolEmo2.0-OUT": 43.93, + "RuReviewsClassification": 62.99, + "RuSciBenchGRNTIClassification": 56.28, + "RuSciBenchOECDClassification": 42.69, + "ScalaDaClassification": 50.79, + "ScalaNbClassification": 50.32, + "TNews": 47.52, + "ToxicConversationsClassification": 64.33, + "TweetSentimentExtractionClassification": 62.8, + "Waimai": 85.98 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "multilingual-e5-base", + "8TagsClustering": 24.97, + "AlloProfClusteringP2P": 62.09, + "AlloProfClusteringS2S": 32.98, + "ArxivClusteringP2P": 43.35, + "ArxivClusteringS2S": 36.0, + "BiorxivClusteringP2P": 37.55, + "BiorxivClusteringS2S": 30.33, + "CLSClusteringP2P": 32.41, + "CLSClusteringS2S": 36.99, + "GeoreviewClusteringP2P": 54.46, + "HALClusteringS2S": 22.48, + "MLSUMClusteringP2P": 43.48, + "MLSUMClusteringP2P (ru)": 43.47, + "MLSUMClusteringS2S": 38.53, + "MLSUMClusteringS2S (ru)": 40.87, + "MasakhaNEWSClusteringP2P": 47.91, + "MasakhaNEWSClusteringP2P (amh)": 58.05, + "MasakhaNEWSClusteringP2P (eng)": 43.8, + "MasakhaNEWSClusteringP2P (fra)": 58.28, + "MasakhaNEWSClusteringP2P (hau)": 44.78, + "MasakhaNEWSClusteringP2P (ibo)": 44.97, + "MasakhaNEWSClusteringP2P (lin)": 48.08, + "MasakhaNEWSClusteringP2P (lug)": 50.15, + "MasakhaNEWSClusteringP2P (orm)": 38.02, + "MasakhaNEWSClusteringP2P (pcm)": 71.03, + "MasakhaNEWSClusteringP2P (run)": 58.28, + "MasakhaNEWSClusteringP2P (sna)": 59.25, + "MasakhaNEWSClusteringP2P (som)": 37.27, + "MasakhaNEWSClusteringP2P (swa)": 34.54, + "MasakhaNEWSClusteringP2P (tir)": 53.44, + "MasakhaNEWSClusteringP2P (xho)": 40.32, + "MasakhaNEWSClusteringP2P (yor)": 37.97, + "MasakhaNEWSClusteringS2S": 51.16, + "MasakhaNEWSClusteringS2S (amh)": 49.38, + "MasakhaNEWSClusteringS2S (eng)": 45.76, + "MasakhaNEWSClusteringS2S (fra)": 55.43, + "MasakhaNEWSClusteringS2S (hau)": 16.11, + "MasakhaNEWSClusteringS2S (ibo)": 24.38, + "MasakhaNEWSClusteringS2S (lin)": 44.8, + "MasakhaNEWSClusteringS2S (lug)": 45.67, + "MasakhaNEWSClusteringS2S (orm)": 26.41, + "MasakhaNEWSClusteringS2S (pcm)": 83.26, + "MasakhaNEWSClusteringS2S (run)": 48.77, + "MasakhaNEWSClusteringS2S (sna)": 43.9, + "MasakhaNEWSClusteringS2S (som)": 25.43, + "MasakhaNEWSClusteringS2S (swa)": 9.87, + "MasakhaNEWSClusteringS2S (tir)": 51.66, + "MasakhaNEWSClusteringS2S (xho)": 29.65, + "MasakhaNEWSClusteringS2S (yor)": 30.12, + "MedrxivClusteringP2P": 30.6, + "MedrxivClusteringS2S": 28.73, + "RedditClustering": 43.15, + "RedditClusteringP2P": 61.69, + "RuSciBenchGRNTIClusteringP2P": 51.56, + "RuSciBenchOECDClusteringP2P": 44.79, + "StackExchangeClustering": 55.31, + "StackExchangeClusteringP2P": 33.51, + "ThuNewsClusteringP2P": 40.98, + "ThuNewsClusteringS2S": 52.36, + "TwentyNewsgroupsClustering": 35.55 } ] }, "PairClassification": { "max_ap": [ { - "Model": "nomic-embed-text-v1.5-256", - "SprintDuplicateQuestions": 92.31, - "TwitterSemEval2015": 73.61, - "TwitterURLCorpus": 86.34 + "Model": "multilingual-e5-base", + "CDSC-E": 72.67, + "Cmnli": 74.51, + "Ocnli": 59.63, + "OpusparcusPC": 92.72, + "PPC": 88.01, + "PSC": 99.14, + "PawsXPairClassification": 56.93, + "SICK-E-PL": 68.77 }, { - "Model": "nomic-embed-text-v1.5-256", - "SprintDuplicateQuestions": 92.31, - "TwitterSemEval2015": 73.61, - "TwitterURLCorpus": 86.34 + "Model": "multilingual-e5-base", + "CDSC-E": 72.7, + "Cmnli": 74.98, + "Ocnli": 60.47, + "OpusparcusPC": 92.73, + "OpusparcusPC (de)": 95.83, + "OpusparcusPC (en)": 98.71, + "OpusparcusPC (fi)": 90.3, + "OpusparcusPC (fr)": 92.12, + "OpusparcusPC (ru)": 86.82, + "OpusparcusPC (sv)": 93.05, + "PPC": 88.01, + "PSC": 99.14, + "PawsXPairClassification": 56.93, + "PawsXPairClassification (de)": 54.26, + "PawsXPairClassification (en)": 55.79, + "PawsXPairClassification (es)": 54.13, + "PawsXPairClassification (fr)": 56.07, + "PawsXPairClassification (ja)": 49.15, + "PawsXPairClassification (ko)": 51.01, + "PawsXPairClassification (zh)": 55.13, + "SICK-E-PL": 68.76, + "SprintDuplicateQuestions": 93.02, + "TERRa": 54.98, + "TwitterSemEval2015": 72.21, + "TwitterURLCorpus": 85.48 + }, + { + "Model": "multilingual-e5-base", + "CDSC-E": 72.7, + "OpusparcusPC (de)": 95.83, + "OpusparcusPC (en)": 98.71, + "OpusparcusPC (fi)": 90.3, + "OpusparcusPC (fr)": 92.12, + "OpusparcusPC (ru)": 86.82, + "OpusparcusPC (sv)": 93.05, + "PSC": 99.14, + "PawsXPairClassification (de)": 54.11, + "PawsXPairClassification (en)": 55.79, + "PawsXPairClassification (es)": 54.13, + "PawsXPairClassification (fr)": 56.01, + "PawsXPairClassification (ja)": 49.02, + "PawsXPairClassification (ko)": 51.01, + "PawsXPairClassification (zh)": 55.13, + "SICK-E-PL": 68.76, + "SprintDuplicateQuestions": 93.02, + "TERRa": 54.96, + "TwitterSemEval2015": 72.21, + "TwitterURLCorpus": 85.48 } ] }, "Reranking": { "map": [ { - "Model": "nomic-embed-text-v1.5-256", - "AskUbuntuDupQuestions": 61.34, - "MindSmallReranking": 30.04, - "SciDocsRR": 79.4, - "StackOverflowDupQuestions": 49.95 + "Model": "multilingual-e5-base", + "AlloprofReranking": 65.9, + "AskUbuntuDupQuestions": 59.28, + "CMedQAv1": 65.21, + "CMedQAv2": 66.06, + "MMarcoReranking": 30.52, + "MindSmallReranking": 29.28, + "RuBQReranking": 72.01, + "SciDocsRR": 81.81, + "StackOverflowDupQuestions": 49.75, + "SyntecReranking": 85.31, + "T2Reranking": 64.86 + }, + { + "Model": "multilingual-e5-base", + "MIRACLReranking (ru)": 60.47 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "nomic-embed-text-v1.5-256", - "ArguAna": 45.44, - "CQADupstackRetrieval": 37.61, - "ClimateFEVER": 39.63, - "DBPedia": 39.42, - "FEVER": 84.4, - "FiQA2018": 35.0, - "HotpotQA": 67.78, - "MSMARCO": 41.38, - "NFCorpus": 32.54, - "NQ": 57.1, - "QuoraRetrieval": 87.65, - "SCIDOCS": 16.76, - "SciFact": 68.24, - "TRECCOVID": 80.65, - "Touche2020": 28.49 + "Model": "multilingual-e5-base", + "AILACasedocs": 26.05, + "AILAStatutes": 20.37, + "ARCChallenge": 9.61, + "AlloprofRetrieval": 34.45, + "AlphaNLI": 16.44, + "AppsRetrieval": 20.94, + "ArguAna": 44.21, + "ArguAna-PL": 42.86, + "BSARDRetrieval": 18.83, + "CmedqaRetrieval": 27.2, + "CodeFeedbackMT": 43.18, + "CodeFeedbackST": 72.61, + "CodeSearchNetCCRetrieval (python)": 85.37, + "CodeSearchNetCCRetrieval (javascript)": 78.44, + "CodeSearchNetCCRetrieval (go)": 72.29, + "CodeSearchNetCCRetrieval (ruby)": 81.92, + "CodeSearchNetCCRetrieval (java)": 78.85, + "CodeSearchNetCCRetrieval (php)": 72.02, + "CodeSearchNetRetrieval (python)": 85.68, + "CodeSearchNetRetrieval (javascript)": 71.4, + "CodeSearchNetRetrieval (go)": 89.47, + "CodeSearchNetRetrieval (ruby)": 78.24, + "CodeSearchNetRetrieval (java)": 78.39, + "CodeSearchNetRetrieval (php)": 81.05, + "CodeTransOceanContest": 51.89, + "CodeTransOceanDL": 29.88, + "CosQA": 31.12, + "CovidRetrieval": 73.48, + "DBPedia-PL": 30.23, + "DuRetrieval": 81.66, + "EcomRetrieval": 54.01, + "FiQA-PL": 25.59, + "FiQA2018": 38.15, + "GerDaLIRSmall": 15.3, + "HellaSwag": 24.79, + "HotpotQA-PL": 63.52, + "LEMBNarrativeQARetrieval": 23.6, + "LEMBQMSumRetrieval": 25.16, + "LEMBSummScreenFDRetrieval": 68.21, + "LEMBWikimQARetrieval": 56.04, + "LeCaRDv2": 59.0, + "LegalBenchConsumerContractsQA": 69.02, + "LegalBenchCorporateLobbying": 88.97, + "LegalQuAD": 47.85, + "LegalSummarization": 61.69, + "MIRACLRetrieval (ru)": 61.6, + "MMarcoRetrieval": 76.01, + "MSMARCO-PL": 29.52, + "MedicalRetrieval": 48.33, + "MintakaRetrieval": 23.46, + "MintakaRetrieval (ar)": 23.06, + "MintakaRetrieval (de)": 29.8, + "MintakaRetrieval (es)": 29.88, + "MintakaRetrieval (fr)": 30.96, + "MintakaRetrieval (hi)": 22.68, + "MintakaRetrieval (it)": 29.77, + "MintakaRetrieval (ja)": 22.98, + "MintakaRetrieval (pt)": 30.62, + "NFCorpus": 32.49, + "NFCorpus-PL": 25.99, + "NQ-PL": 44.8, + "PIQA": 25.09, + "Quail": 3.52, + "Quora-PL": 81.22, + "RARbCode": 52.16, + "RARbMath": 65.35, + "RiaNewsRetrieval": 70.24, + "RuBQRetrieval": 69.58, + "SCIDOCS": 17.17, + "SCIDOCS-PL": 12.36, + "SIQA": 3.72, + "SciFact": 69.39, + "SciFact-PL": 62.26, + "SpartQA": 7.91, + "StackOverflowQA": 85.11, + "SyntecRetrieval": 82.86, + "SyntheticText2SQL": 53.61, + "T2Retrieval": 70.77, + "TRECCOVID": 69.5, + "TRECCOVID-PL": 65.94, + "TempReasonL1": 0.72, + "TempReasonL2Fact": 38.76, + "TempReasonL2Pure": 1.63, + "TempReasonL3Fact": 35.85, + "TempReasonL3Pure": 7.11, + "Touche2020": 21.5, + "VideoRetrieval": 61.26, + "WinoGrande": 56.18, + "XPQARetrieval": 65.81, + "XPQARetrieval (ara-ara)": 39.97, + "XPQARetrieval (eng-ara)": 17.23, + "XPQARetrieval (ara-eng)": 34.35, + "XPQARetrieval (deu-deu)": 72.11, + "XPQARetrieval (eng-deu)": 28.91, + "XPQARetrieval (deu-eng)": 61.46, + "XPQARetrieval (spa-spa)": 58.35, + "XPQARetrieval (eng-spa)": 25.27, + "XPQARetrieval (spa-eng)": 51.07, + "XPQARetrieval (fra-fra)": 59.56, + "XPQARetrieval (eng-fra)": 23.69, + "XPQARetrieval (fra-eng)": 53.9, + "XPQARetrieval (hin-hin)": 70.59, + "XPQARetrieval (eng-hin)": 27.57, + "XPQARetrieval (hin-eng)": 63.69, + "XPQARetrieval (ita-ita)": 70.38, + "XPQARetrieval (eng-ita)": 26.06, + "XPQARetrieval (ita-eng)": 56.2, + "XPQARetrieval (jpn-jpn)": 71.97, + "XPQARetrieval (eng-jpn)": 17.63, + "XPQARetrieval (jpn-eng)": 61.03, + "XPQARetrieval (kor-kor)": 36.12, + "XPQARetrieval (eng-kor)": 20.15, + "XPQARetrieval (kor-eng)": 29.27, + "XPQARetrieval (pol-pol)": 48.05, + "XPQARetrieval (eng-pol)": 19.48, + "XPQARetrieval (pol-eng)": 40.18, + "XPQARetrieval (por-por)": 44.78, + "XPQARetrieval (eng-por)": 17.66, + "XPQARetrieval (por-eng)": 40.58, + "XPQARetrieval (tam-tam)": 35.21, + "XPQARetrieval (eng-tam)": 12.64, + "XPQARetrieval (tam-eng)": 26.73, + "XPQARetrieval (cmn-cmn)": 67.06, + "XPQARetrieval (eng-cmn)": 12.72, + "XPQARetrieval (cmn-eng)": 53.53 } ] }, "STS": { "cosine_spearman": [ { - "Model": "nomic-embed-text-v1.5-256", - "BIOSSES": 81.58, - "SICK-R": 79.24, - "STS12": 78.16, - "STS13": 86.01, - "STS14": 81.25, - "STS15": 86.51, - "STS16": 84.24, - "STS17 (en-en)": 86.44, - "STS22 (en)": 65.14, - "STSBenchmark": 84.8 + "Model": "multilingual-e5-base", + "AFQMC": 29.67, + "ATEC": 37.01, + "BQ": 45.45, + "CDSC-R": 90.08, + "LCQMC": 74.15, + "PAWSX": 12.14, + "QBQTC": 28.81, + "SICK-R-PL": 71.23, + "SICKFr": 76.23, + "STS22": 34.07, + "STSB": 79.05, + "STSBenchmarkMultilingualSTS": 80.62 + }, + { + "Model": "multilingual-e5-base", + "AFQMC": 29.66, + "ATEC": 37.01, + "BIOSSES": 85.05, + "BQ": 45.45, + "CDSC-R": 90.09, + "LCQMC": 74.15, + "PAWSX": 12.13, + "RUParaPhraserSTS": 70.17, + "RuSTSBenchmarkSTS": 79.64, + "SICK-R": 78.51, + "SICK-R-PL": 71.23, + "SICKFr": 75.76, + "STS12": 76.7, + "STS13": 78.02, + "STS14": 76.6, + "STS15": 88.16, + "STS16": 84.28, + "STS17 (en-de)": 82.08, + "STS17 (es-en)": 76.56, + "STS17 (fr-en)": 80.18, + "STS17 (en-tr)": 63.3, + "STS17 (ko-ko)": 79.95, + "STS17 (en-ar)": 71.27, + "STS17 (en-en)": 87.84, + "STS17 (es-es)": 86.74, + "STS17 (ar-ar)": 74.48, + "STS17 (it-en)": 80.16, + "STS17 (nl-en)": 79.29, + "STS22 (fr)": 75.04, + "STS22 (zh-en)": 69.8, + "STS22 (ar)": 57.82, + "STS22 (es-it)": 66.43, + "STS22 (it)": 77.76, + "STS22 (fr-pl)": 73.25, + "STS22 (de-en)": 54.89, + "STS22 (pl-en)": 70.37, + "STS22 (zh)": 65.63, + "STS22 (de)": 55.95, + "STS22 (pl)": 34.08, + "STS22 (de-fr)": 59.68, + "STS22 (es)": 66.67, + "STS22 (ru)": 60.67, + "STS22 (es-en)": 74.0, + "STS22 (en)": 62.26, + "STS22 (tr)": 63.71, + "STS22 (de-pl)": 39.35, + "STSB": 79.04, + "STSBenchmark": 85.64, + "STSBenchmarkMultilingualSTS (zh)": 79.87, + "STSBenchmarkMultilingualSTS (de)": 79.68, + "STSBenchmarkMultilingualSTS (es)": 81.75, + "STSBenchmarkMultilingualSTS (en)": 85.64, + "STSBenchmarkMultilingualSTS (fr)": 80.85, + "STSBenchmarkMultilingualSTS (pl)": 74.93, + "STSBenchmarkMultilingualSTS (pt)": 67.16, + "STSBenchmarkMultilingualSTS (nl)": 75.96, + "STSBenchmarkMultilingualSTS (it)": 78.09, + "STSBenchmarkMultilingualSTS (ru)": 79.33 + }, + { + "Model": "multilingual-e5-base", + "AFQMC": 29.66, + "ATEC": 37.01, + "BIOSSES": 85.05, + "BQ": 45.45, + "CDSC-R": 90.09, + "LCQMC": 74.15, + "PAWSX": 12.13, + "RUParaPhraserSTS": 70.17, + "RuSTSBenchmarkSTS": 79.64, + "SICK-R": 78.51, + "SICK-R-PL": 71.23, + "SICKFr": 75.76, + "STS12": 76.7, + "STS13": 78.02, + "STS14": 76.6, + "STS15": 88.16, + "STS16": 84.28, + "STS17 (en-de)": 82.08, + "STS17 (es-en)": 76.56, + "STS17 (fr-en)": 80.18, + "STS17 (en-tr)": 63.3, + "STS17 (ko-ko)": 79.95, + "STS17 (en-ar)": 71.27, + "STS17 (en-en)": 87.84, + "STS17 (es-es)": 86.74, + "STS17 (ar-ar)": 74.48, + "STS17 (it-en)": 80.16, + "STS17 (nl-en)": 79.29, + "STS22 (fr)": 75.04, + "STS22 (zh-en)": 69.8, + "STS22 (ar)": 57.82, + "STS22 (es-it)": 66.43, + "STS22 (it)": 77.76, + "STS22 (fr-pl)": 73.25, + "STS22 (de-en)": 54.89, + "STS22 (pl-en)": 70.37, + "STS22 (zh)": 65.63, + "STS22 (de)": 55.95, + "STS22 (pl)": 34.08, + "STS22 (de-fr)": 59.68, + "STS22 (es)": 66.67, + "STS22 (ru)": 60.67, + "STS22 (es-en)": 74.0, + "STS22 (en)": 62.26, + "STS22 (tr)": 63.71, + "STS22 (de-pl)": 39.35, + "STSB": 79.04, + "STSBenchmark": 85.64, + "STSBenchmarkMultilingualSTS (zh)": 79.87, + "STSBenchmarkMultilingualSTS (de)": 79.68, + "STSBenchmarkMultilingualSTS (es)": 81.75, + "STSBenchmarkMultilingualSTS (en)": 85.64, + "STSBenchmarkMultilingualSTS (fr)": 80.85, + "STSBenchmarkMultilingualSTS (pl)": 74.93, + "STSBenchmarkMultilingualSTS (pt)": 67.16, + "STSBenchmarkMultilingualSTS (nl)": 75.96, + "STSBenchmarkMultilingualSTS (it)": 78.09, + "STSBenchmarkMultilingualSTS (ru)": 79.33 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "nomic-embed-text-v1.5-256", - "SummEval": 30.05 + "Model": "multilingual-e5-base", + "SummEval": 30.23, + "SummEvalFr": 32.96 + }, + { + "Model": "multilingual-e5-base", + "SummEval": 30.23, + "SummEvalFr": 32.96 + }, + { + "Model": "multilingual-e5-base", + "SummEvalFr": 30.76 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "multilingual-e5-base", + "CEDRClassification": 42.32, + "SensitiveTopicsClassification": 24.98 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "multilingual-e5-base", + "Core17InstructionRetrieval": 0.61, + "News21InstructionRetrieval": -1.14, + "Robust04InstructionRetrieval": -7.43 + } + ] } }, - "sentence-t5-base": { + "multilingual-e5-large": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "multilingual-e5-large", + "BornholmBitextMining": 29.87, + "Tatoeba (bre-eng)": 11.1, + "Tatoeba (oci-eng)": 54.91, + "Tatoeba (orv-eng)": 39.87, + "Tatoeba (tur-eng)": 96.27, + "Tatoeba (afr-eng)": 90.22, + "Tatoeba (dtp-eng)": 7.03, + "Tatoeba (glg-eng)": 93.34, + "Tatoeba (sqi-eng)": 94.7, + "Tatoeba (gla-eng)": 59.0, + "Tatoeba (heb-eng)": 86.61, + "Tatoeba (mal-eng)": 97.7, + "Tatoeba (yid-eng)": 76.33, + "Tatoeba (nob-eng)": 97.2, + "Tatoeba (tha-eng)": 95.38, + "Tatoeba (ind-eng)": 92.9, + "Tatoeba (isl-eng)": 92.09, + "Tatoeba (ces-eng)": 94.89, + "Tatoeba (uig-eng)": 72.17, + "Tatoeba (rus-eng)": 92.32, + "Tatoeba (zsm-eng)": 94.53, + "Tatoeba (war-eng)": 62.02, + "Tatoeba (jpn-eng)": 95.28, + "Tatoeba (hin-eng)": 94.48, + "Tatoeba (mkd-eng)": 85.63, + "Tatoeba (vie-eng)": 97.0, + "Tatoeba (bos-eng)": 92.86, + "Tatoeba (arq-eng)": 41.56, + "Tatoeba (cha-eng)": 27.16, + "Tatoeba (ell-eng)": 93.88, + "Tatoeba (hye-eng)": 90.92, + "Tatoeba (kaz-eng)": 79.67, + "Tatoeba (xho-eng)": 80.87, + "Tatoeba (arz-eng)": 74.73, + "Tatoeba (fin-eng)": 95.44, + "Tatoeba (gle-eng)": 71.48, + "Tatoeba (ile-eng)": 79.16, + "Tatoeba (ber-eng)": 38.9, + "Tatoeba (mon-eng)": 87.53, + "Tatoeba (aze-eng)": 87.61, + "Tatoeba (srp-eng)": 93.1, + "Tatoeba (tzl-eng)": 53.16, + "Tatoeba (dsb-eng)": 48.44, + "Tatoeba (pol-eng)": 96.6, + "Tatoeba (eus-eng)": 77.82, + "Tatoeba (nov-eng)": 71.62, + "Tatoeba (tuk-eng)": 33.15, + "Tatoeba (ukr-eng)": 93.32, + "Tatoeba (est-eng)": 85.03, + "Tatoeba (deu-eng)": 99.07, + "Tatoeba (ido-eng)": 83.52, + "Tatoeba (cym-eng)": 76.21, + "Tatoeba (ara-eng)": 85.48, + "Tatoeba (csb-eng)": 36.98, + "Tatoeba (cbk-eng)": 69.26, + "Tatoeba (ben-eng)": 83.02, + "Tatoeba (slk-eng)": 93.13, + "Tatoeba (fao-eng)": 72.62, + "Tatoeba (pam-eng)": 9.32, + "Tatoeba (hsb-eng)": 58.9, + "Tatoeba (lat-eng)": 53.37, + "Tatoeba (nno-eng)": 91.4, + "Tatoeba (gsw-eng)": 51.65, + "Tatoeba (cat-eng)": 91.03, + "Tatoeba (urd-eng)": 89.21, + "Tatoeba (kzj-eng)": 7.91, + "Tatoeba (kor-eng)": 90.65, + "Tatoeba (slv-eng)": 89.57, + "Tatoeba (ast-eng)": 81.76, + "Tatoeba (cmn-eng)": 95.28, + "Tatoeba (cor-eng)": 6.28, + "Tatoeba (tel-eng)": 91.34, + "Tatoeba (kab-eng)": 36.54, + "Tatoeba (yue-eng)": 88.71, + "Tatoeba (swe-eng)": 95.3, + "Tatoeba (pes-eng)": 92.14, + "Tatoeba (hun-eng)": 94.01, + "Tatoeba (tgl-eng)": 92.0, + "Tatoeba (pms-eng)": 59.85, + "Tatoeba (lvs-eng)": 90.06, + "Tatoeba (swh-eng)": 71.61, + "Tatoeba (uzb-eng)": 72.35, + "Tatoeba (por-eng)": 93.63, + "Tatoeba (ron-eng)": 94.87, + "Tatoeba (nds-eng)": 69.28, + "Tatoeba (fry-eng)": 63.43, + "Tatoeba (khm-eng)": 59.96, + "Tatoeba (nld-eng)": 96.63, + "Tatoeba (lit-eng)": 88.48, + "Tatoeba (awa-eng)": 72.27, + "Tatoeba (amh-eng)": 80.69, + "Tatoeba (jav-eng)": 75.46, + "Tatoeba (mar-eng)": 88.58, + "Tatoeba (spa-eng)": 97.1, + "Tatoeba (lfn-eng)": 62.91, + "Tatoeba (ceb-eng)": 55.31, + "Tatoeba (bul-eng)": 92.93, + "Tatoeba (tat-eng)": 73.51, + "Tatoeba (kur-eng)": 66.83, + "Tatoeba (mhr-eng)": 6.79, + "Tatoeba (epo-eng)": 96.01, + "Tatoeba (kat-eng)": 84.09, + "Tatoeba (ina-eng)": 93.47, + "Tatoeba (tam-eng)": 88.23, + "Tatoeba (ita-eng)": 93.29, + "Tatoeba (hrv-eng)": 96.15, + "Tatoeba (fra-eng)": 93.42, + "Tatoeba (wuu-eng)": 86.37, + "Tatoeba (dan-eng)": 95.08, + "Tatoeba (max-eng)": 63.41, + "Tatoeba (ang-eng)": 40.18, + "Tatoeba (bel-eng)": 91.08, + "Tatoeba (swg-eng)": 55.64 + } + ] }, "Classification": { "accuracy": [ { - "Model": "sentence-t5-base", - "AmazonCounterfactualClassification (de)": 69.98, - "AmazonCounterfactualClassification (en)": 75.82, - "AmazonCounterfactualClassification (en-ext)": 76.81, - "AmazonCounterfactualClassification (ja)": 46.05, - "AmazonPolarityClassification": 85.12, - "AmazonReviewsClassification (de)": 37.9, - "AmazonReviewsClassification (en)": 44.94, - "AmazonReviewsClassification (es)": 37.33, - "AmazonReviewsClassification (fr)": 37.35, - "AmazonReviewsClassification (ja)": 22.29, - "AmazonReviewsClassification (zh)": 21.53, - "Banking77Classification": 76.48, - "EmotionClassification": 51.35, - "ImdbClassification": 77.34, - "MTOPDomainClassification (de)": 76.98, - "MTOPDomainClassification (en)": 90.34, - "MTOPDomainClassification (es)": 73.61, - "MTOPDomainClassification (fr)": 75.03, - "MTOPDomainClassification (hi)": 21.4, - "MTOPDomainClassification (th)": 16.21, - "MTOPIntentClassification (de)": 44.43, - "MTOPIntentClassification (en)": 63.32, - "MTOPIntentClassification (es)": 42.03, - "MTOPIntentClassification (fr)": 43.85, - "MTOPIntentClassification (hi)": 3.8, - "MTOPIntentClassification (th)": 5.21, - "MasakhaNEWSClassification (fra)": 81.21, - "MassiveIntentClassification (af)": 34.32, - "MassiveIntentClassification (am)": 2.38, - "MassiveIntentClassification (ar)": 4.53, - "MassiveIntentClassification (az)": 31.76, - "MassiveIntentClassification (bn)": 2.58, - "MassiveIntentClassification (cy)": 28.94, - "MassiveIntentClassification (da)": 38.82, - "MassiveIntentClassification (de)": 45.23, - "MassiveIntentClassification (el)": 10.05, - "MassiveIntentClassification (en)": 69.74, - "MassiveIntentClassification (es)": 45.32, - "MassiveIntentClassification (fa)": 3.58, - "MassiveIntentClassification (fi)": 33.52, - "MassiveIntentClassification (fr)": 51.13, - "MassiveIntentClassification (he)": 2.63, - "MassiveIntentClassification (hi)": 2.68, - "MassiveIntentClassification (hu)": 32.31, - "MassiveIntentClassification (hy)": 3.33, - "MassiveIntentClassification (id)": 35.5, - "MassiveIntentClassification (is)": 29.82, - "MassiveIntentClassification (it)": 45.59, - "MassiveIntentClassification (ja)": 3.67, - "MassiveIntentClassification (jv)": 31.15, - "MassiveIntentClassification (ka)": 2.77, - "MassiveIntentClassification (km)": 5.66, - "MassiveIntentClassification (kn)": 2.59, - "MassiveIntentClassification (ko)": 2.34, - "MassiveIntentClassification (lv)": 33.97, - "MassiveIntentClassification (ml)": 2.55, - "MassiveIntentClassification (mn)": 14.7, - "MassiveIntentClassification (ms)": 33.12, - "MassiveIntentClassification (my)": 4.42, - "MassiveIntentClassification (nb)": 38.53, - "MassiveIntentClassification (nl)": 37.96, - "MassiveIntentClassification (pl)": 34.41, - "MassiveIntentClassification (pt)": 43.35, - "MassiveIntentClassification (ro)": 42.69, - "MassiveIntentClassification (ru)": 14.82, - "MassiveIntentClassification (sl)": 34.54, - "MassiveIntentClassification (sq)": 38.54, - "MassiveIntentClassification (sv)": 35.98, - "MassiveIntentClassification (sw)": 32.14, - "MassiveIntentClassification (ta)": 1.41, - "MassiveIntentClassification (te)": 2.5, - "MassiveIntentClassification (th)": 3.71, - "MassiveIntentClassification (tl)": 36.04, - "MassiveIntentClassification (tr)": 33.77, - "MassiveIntentClassification (ur)": 2.99, - "MassiveIntentClassification (vi)": 22.62, - "MassiveIntentClassification (zh-CN)": 1.12, - "MassiveIntentClassification (zh-TW)": 4.63, - "MassiveScenarioClassification (af)": 44.45, - "MassiveScenarioClassification (am)": 7.51, - "MassiveScenarioClassification (ar)": 12.32, - "MassiveScenarioClassification (az)": 38.41, - "MassiveScenarioClassification (bn)": 8.45, - "MassiveScenarioClassification (cy)": 35.04, - "MassiveScenarioClassification (da)": 48.36, - "MassiveScenarioClassification (de)": 59.12, - "MassiveScenarioClassification (el)": 17.68, - "MassiveScenarioClassification (en)": 72.32, - "MassiveScenarioClassification (es)": 55.61, - "MassiveScenarioClassification (fa)": 6.86, - "MassiveScenarioClassification (fi)": 41.34, - "MassiveScenarioClassification (fr)": 59.92, - "MassiveScenarioClassification (he)": 7.86, - "MassiveScenarioClassification (hi)": 7.63, - "MassiveScenarioClassification (hu)": 41.31, - "MassiveScenarioClassification (hy)": 9.23, - "MassiveScenarioClassification (id)": 44.64, - "MassiveScenarioClassification (is)": 39.63, - "MassiveScenarioClassification (it)": 54.58, - "MassiveScenarioClassification (ja)": 4.96, - "MassiveScenarioClassification (jv)": 40.73, - "MassiveScenarioClassification (ka)": 7.51, - "MassiveScenarioClassification (km)": 8.73, - "MassiveScenarioClassification (kn)": 7.99, - "MassiveScenarioClassification (ko)": 6.03, - "MassiveScenarioClassification (lv)": 36.42, - "MassiveScenarioClassification (ml)": 6.96, - "MassiveScenarioClassification (mn)": 19.85, - "MassiveScenarioClassification (ms)": 43.18, - "MassiveScenarioClassification (my)": 9.46, - "MassiveScenarioClassification (nb)": 46.6, - "MassiveScenarioClassification (nl)": 50.0, - "MassiveScenarioClassification (pl)": 42.3, - "MassiveScenarioClassification (pt)": 52.24, - "MassiveScenarioClassification (ro)": 53.7, - "MassiveScenarioClassification (ru)": 20.69, - "MassiveScenarioClassification (sl)": 39.79, - "MassiveScenarioClassification (sq)": 50.16, - "MassiveScenarioClassification (sv)": 46.69, - "MassiveScenarioClassification (sw)": 40.48, - "MassiveScenarioClassification (ta)": 7.47, - "MassiveScenarioClassification (te)": 6.87, - "MassiveScenarioClassification (th)": 8.26, - "MassiveScenarioClassification (tl)": 48.94, - "MassiveScenarioClassification (tr)": 41.83, - "MassiveScenarioClassification (ur)": 9.77, - "MassiveScenarioClassification (vi)": 30.01, - "MassiveScenarioClassification (zh-CN)": 4.17, - "MassiveScenarioClassification (zh-TW)": 7.91, - "ToxicConversationsClassification": 68.2, - "TweetSentimentExtractionClassification": 62.71 + "Model": "multilingual-e5-large", + "AllegroReviews": 41.04, + "AmazonCounterfactualClassification (en-ext)": 78.73, + "AmazonCounterfactualClassification (en)": 78.67, + "AmazonCounterfactualClassification (de)": 68.66, + "AmazonCounterfactualClassification (ja)": 78.8, + "AmazonPolarityClassification": 93.26, + "AmazonReviewsClassification": 41.91, + "AmazonReviewsClassification (en)": 49.2, + "AmazonReviewsClassification (de)": 46.5, + "AmazonReviewsClassification (es)": 44.35, + "AmazonReviewsClassification (fr)": 42.55, + "AmazonReviewsClassification (ja)": 41.71, + "AmazonReviewsClassification (zh)": 38.87, + "AngryTweetsClassification": 57.69, + "Banking77Classification": 75.88, + "CBD": 69.84, + "DKHateClassification": 66.02, + "DanishPoliticalCommentsClassification": 39.43, + "EmotionClassification": 47.58, + "GeoreviewClassification": 49.69, + "HeadlineClassification": 77.19, + "IFlyTek": 41.86, + "ImdbClassification": 90.23, + "InappropriatenessClassification": 61.59, + "JDReview": 80.54, + "KinopoiskClassification": 56.59, + "LccSentimentClassification": 61.53, + "MTOPDomainClassification": 86.41, + "MTOPDomainClassification (en)": 91.81, + "MTOPDomainClassification (de)": 90.44, + "MTOPDomainClassification (es)": 88.34, + "MTOPDomainClassification (fr)": 86.23, + "MTOPDomainClassification (hi)": 86.84, + "MTOPDomainClassification (th)": 86.88, + "MTOPIntentClassification": 59.43, + "MTOPIntentClassification (en)": 64.29, + "MTOPIntentClassification (de)": 65.97, + "MTOPIntentClassification (es)": 61.9, + "MTOPIntentClassification (fr)": 56.25, + "MTOPIntentClassification (hi)": 59.17, + "MTOPIntentClassification (th)": 62.59, + "MasakhaNEWSClassification": 79.38, + "MasakhaNEWSClassification (amh)": 83.7, + "MasakhaNEWSClassification (eng)": 78.26, + "MasakhaNEWSClassification (fra)": 76.11, + "MasakhaNEWSClassification (hau)": 76.17, + "MasakhaNEWSClassification (ibo)": 70.05, + "MasakhaNEWSClassification (lin)": 75.89, + "MasakhaNEWSClassification (lug)": 73.63, + "MasakhaNEWSClassification (orm)": 80.31, + "MasakhaNEWSClassification (pcm)": 89.15, + "MasakhaNEWSClassification (run)": 76.55, + "MasakhaNEWSClassification (sna)": 86.99, + "MasakhaNEWSClassification (som)": 64.63, + "MasakhaNEWSClassification (swa)": 73.42, + "MasakhaNEWSClassification (tir)": 72.06, + "MasakhaNEWSClassification (xho)": 82.56, + "MasakhaNEWSClassification (yor)": 81.09, + "MassiveIntentClassification": 65.07, + "MassiveIntentClassification (he)": 62.44, + "MassiveIntentClassification (id)": 63.51, + "MassiveIntentClassification (fi)": 64.28, + "MassiveIntentClassification (hu)": 64.0, + "MassiveIntentClassification (nb)": 64.54, + "MassiveIntentClassification (vi)": 63.39, + "MassiveIntentClassification (ko)": 63.92, + "MassiveIntentClassification (ta)": 53.41, + "MassiveIntentClassification (te)": 53.96, + "MassiveIntentClassification (da)": 63.7, + "MassiveIntentClassification (ar)": 54.1, + "MassiveIntentClassification (en)": 68.51, + "MassiveIntentClassification (hi)": 60.93, + "MassiveIntentClassification (bn)": 55.6, + "MassiveIntentClassification (tr)": 64.61, + "MassiveIntentClassification (am)": 45.48, + "MassiveIntentClassification (es)": 64.01, + "MassiveIntentClassification (lv)": 58.31, + "MassiveIntentClassification (my)": 49.73, + "MassiveIntentClassification (sq)": 57.3, + "MassiveIntentClassification (th)": 62.75, + "MassiveIntentClassification (sl)": 59.38, + "MassiveIntentClassification (ml)": 57.58, + "MassiveIntentClassification (is)": 53.3, + "MassiveIntentClassification (ms)": 58.49, + "MassiveIntentClassification (nl)": 65.0, + "MassiveIntentClassification (az)": 54.68, + "MassiveIntentClassification (ru)": 65.76, + "MassiveIntentClassification (sv)": 66.52, + "MassiveIntentClassification (ro)": 59.76, + "MassiveIntentClassification (zh-TW)": 58.78, + "MassiveIntentClassification (jv)": 48.96, + "MassiveIntentClassification (fa)": 63.74, + "MassiveIntentClassification (pl)": 65.09, + "MassiveIntentClassification (km)": 34.88, + "MassiveIntentClassification (ja)": 67.11, + "MassiveIntentClassification (kn)": 53.45, + "MassiveIntentClassification (fr)": 63.37, + "MassiveIntentClassification (ka)": 41.45, + "MassiveIntentClassification (sw)": 47.69, + "MassiveIntentClassification (zh-CN)": 66.23, + "MassiveIntentClassification (ur)": 54.6, + "MassiveIntentClassification (tl)": 54.77, + "MassiveIntentClassification (cy)": 44.22, + "MassiveIntentClassification (de)": 63.82, + "MassiveIntentClassification (af)": 53.69, + "MassiveIntentClassification (it)": 63.89, + "MassiveIntentClassification (el)": 64.34, + "MassiveIntentClassification (mn)": 49.6, + "MassiveIntentClassification (hy)": 50.89, + "MassiveIntentClassification (pt)": 65.6, + "MassiveScenarioClassification": 69.82, + "MassiveScenarioClassification (en)": 73.04, + "MassiveScenarioClassification (ta)": 58.76, + "MassiveScenarioClassification (ml)": 63.17, + "MassiveScenarioClassification (pt)": 68.33, + "MassiveScenarioClassification (he)": 67.72, + "MassiveScenarioClassification (ar)": 61.0, + "MassiveScenarioClassification (pl)": 69.83, + "MassiveScenarioClassification (vi)": 68.91, + "MassiveScenarioClassification (ms)": 63.55, + "MassiveScenarioClassification (sl)": 65.33, + "MassiveScenarioClassification (hu)": 70.53, + "MassiveScenarioClassification (my)": 54.03, + "MassiveScenarioClassification (sq)": 63.79, + "MassiveScenarioClassification (fi)": 68.62, + "MassiveScenarioClassification (te)": 59.49, + "MassiveScenarioClassification (ru)": 70.85, + "MassiveScenarioClassification (am)": 52.69, + "MassiveScenarioClassification (hi)": 66.85, + "MassiveScenarioClassification (fr)": 68.74, + "MassiveScenarioClassification (cy)": 51.25, + "MassiveScenarioClassification (tr)": 68.12, + "MassiveScenarioClassification (ro)": 66.06, + "MassiveScenarioClassification (zh-CN)": 72.25, + "MassiveScenarioClassification (kn)": 59.36, + "MassiveScenarioClassification (nb)": 70.44, + "MassiveScenarioClassification (de)": 71.25, + "MassiveScenarioClassification (el)": 69.74, + "MassiveScenarioClassification (es)": 69.07, + "MassiveScenarioClassification (da)": 71.18, + "MassiveScenarioClassification (ur)": 60.89, + "MassiveScenarioClassification (af)": 62.35, + "MassiveScenarioClassification (ko)": 70.54, + "MassiveScenarioClassification (bn)": 61.85, + "MassiveScenarioClassification (ja)": 73.16, + "MassiveScenarioClassification (az)": 58.49, + "MassiveScenarioClassification (fa)": 67.55, + "MassiveScenarioClassification (sv)": 72.77, + "MassiveScenarioClassification (zh-TW)": 64.35, + "MassiveScenarioClassification (ka)": 47.82, + "MassiveScenarioClassification (nl)": 71.11, + "MassiveScenarioClassification (sw)": 56.27, + "MassiveScenarioClassification (hy)": 55.76, + "MassiveScenarioClassification (is)": 60.74, + "MassiveScenarioClassification (mn)": 55.37, + "MassiveScenarioClassification (it)": 69.45, + "MassiveScenarioClassification (tl)": 60.71, + "MassiveScenarioClassification (km)": 41.14, + "MassiveScenarioClassification (th)": 69.06, + "MassiveScenarioClassification (lv)": 64.28, + "MassiveScenarioClassification (jv)": 56.24, + "MassiveScenarioClassification (id)": 69.43, + "MultilingualSentiment": 70.81, + "NoRecClassification": 58.43, + "NordicLangClassification": 80.15, + "NorwegianParliament": 60.36, + "OnlineShopping": 90.45, + "PAC": 70.33, + "PolEmo2.0-IN": 77.06, + "PolEmo2.0-OUT": 53.48, + "RuReviewsClassification": 65.28, + "RuSciBenchGRNTIClassification": 58.2, + "RuSciBenchOECDClassification": 43.91, + "ScalaDaClassification": 50.77, + "ScalaNbClassification": 50.44, + "TNews": 48.8, + "ToxicConversationsClassification": 66.01, + "TweetSentimentExtractionClassification": 62.8, + "Waimai": 86.3 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-t5-base", - "AlloProfClusteringP2P": 58.44, - "AlloProfClusteringS2S": 35.93, - "ArxivClusteringP2P": 39.28, - "ArxivClusteringS2S": 27.26, - "BiorxivClusteringP2P": 33.99, - "BiorxivClusteringS2S": 22.92, - "BlurbsClusteringP2P": 30.59, - "BlurbsClusteringS2S": 11.57, - "HALClusteringS2S": 17.72, - "MLSUMClusteringP2P": 40.77, - "MLSUMClusteringS2S": 30.06, - "MasakhaNEWSClusteringP2P (fra)": 61.9, - "MasakhaNEWSClusteringS2S (fra)": 35.64, - "MedrxivClusteringP2P": 33.2, - "MedrxivClusteringS2S": 26.13, - "RedditClustering": 52.93, - "RedditClusteringP2P": 59.67, - "StackExchangeClustering": 63.13, - "StackExchangeClusteringP2P": 35.68, - "TenKGnadClusteringP2P": 44.88, - "TenKGnadClusteringS2S": 18.11, - "TwentyNewsgroupsClustering": 48.1 + "Model": "multilingual-e5-large", + "8TagsClustering": 33.88, + "AlloProfClusteringP2P": 62.99, + "AlloProfClusteringS2S": 32.26, + "BiorxivClusteringP2P": 35.5, + "BiorxivClusteringS2S": 33.3, + "CLSClusteringP2P": 40.68, + "CLSClusteringS2S": 38.59, + "GeoreviewClusteringP2P": 59.59, + "HALClusteringS2S": 22.44, + "MLSUMClusteringP2P": 44.04, + "MLSUMClusteringP2P (ru)": 42.79, + "MLSUMClusteringS2S": 37.65, + "MLSUMClusteringS2S (ru)": 44.32, + "MasakhaNEWSClusteringP2P": 40.94, + "MasakhaNEWSClusteringP2P (amh)": 67.16, + "MasakhaNEWSClusteringP2P (eng)": 61.1, + "MasakhaNEWSClusteringP2P (fra)": 41.66, + "MasakhaNEWSClusteringP2P (hau)": 60.7, + "MasakhaNEWSClusteringP2P (ibo)": 48.41, + "MasakhaNEWSClusteringP2P (lin)": 57.69, + "MasakhaNEWSClusteringP2P (lug)": 71.95, + "MasakhaNEWSClusteringP2P (orm)": 60.14, + "MasakhaNEWSClusteringP2P (pcm)": 80.84, + "MasakhaNEWSClusteringP2P (run)": 59.91, + "MasakhaNEWSClusteringP2P (sna)": 53.3, + "MasakhaNEWSClusteringP2P (som)": 34.38, + "MasakhaNEWSClusteringP2P (swa)": 33.25, + "MasakhaNEWSClusteringP2P (tir)": 54.21, + "MasakhaNEWSClusteringP2P (xho)": 41.12, + "MasakhaNEWSClusteringP2P (yor)": 36.22, + "MasakhaNEWSClusteringS2S": 30.56, + "MasakhaNEWSClusteringS2S (amh)": 47.24, + "MasakhaNEWSClusteringS2S (eng)": 53.93, + "MasakhaNEWSClusteringS2S (fra)": 39.84, + "MasakhaNEWSClusteringS2S (hau)": 19.24, + "MasakhaNEWSClusteringS2S (ibo)": 28.88, + "MasakhaNEWSClusteringS2S (lin)": 42.22, + "MasakhaNEWSClusteringS2S (lug)": 43.63, + "MasakhaNEWSClusteringS2S (orm)": 26.29, + "MasakhaNEWSClusteringS2S (pcm)": 59.77, + "MasakhaNEWSClusteringS2S (run)": 51.46, + "MasakhaNEWSClusteringS2S (sna)": 48.14, + "MasakhaNEWSClusteringS2S (som)": 25.14, + "MasakhaNEWSClusteringS2S (swa)": 7.28, + "MasakhaNEWSClusteringS2S (tir)": 50.51, + "MasakhaNEWSClusteringS2S (xho)": 30.98, + "MasakhaNEWSClusteringS2S (yor)": 34.09, + "MedrxivClusteringP2P": 31.7, + "MedrxivClusteringS2S": 29.76, + "RedditClustering": 46.91, + "RedditClusteringP2P": 63.0, + "RuSciBenchGRNTIClusteringP2P": 51.98, + "RuSciBenchOECDClusteringP2P": 45.12, + "StackExchangeClustering": 58.37, + "StackExchangeClusteringP2P": 32.9, + "ThuNewsClusteringP2P": 58.05, + "ThuNewsClusteringS2S": 55.59, + "TwentyNewsgroupsClustering": 39.4 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sentence-t5-base", - "OpusparcusPC (fr)": 89.4, - "PawsXPairClassification (fr)": 55.35, - "SprintDuplicateQuestions": 91.23, - "TwitterSemEval2015": 78.25, - "TwitterURLCorpus": 86.05 + "Model": "multilingual-e5-large", + "CDSC-E": 74.47, + "Cmnli": 78.18, + "Ocnli": 61.6, + "OpusparcusPC": 93.89, + "PPC": 92.18, + "PSC": 99.39, + "PawsXPairClassification": 58.5, + "SICK-E-PL": 75.96 }, { - "Model": "sentence-t5-base", - "OpusparcusPC (fr)": 89.41, - "PawsXPairClassification (fr)": 55.43, - "SprintDuplicateQuestions": 91.23, - "TwitterSemEval2015": 78.25, - "TwitterURLCorpus": 86.05 + "Model": "multilingual-e5-large", + "CDSC-E": 74.47, + "Cmnli": 78.18, + "Ocnli": 61.6, + "OpusparcusPC": 93.89, + "OpusparcusPC (de)": 97.27, + "OpusparcusPC (en)": 98.74, + "OpusparcusPC (fi)": 94.26, + "OpusparcusPC (fr)": 93.68, + "OpusparcusPC (ru)": 89.64, + "OpusparcusPC (sv)": 94.98, + "PPC": 92.18, + "PSC": 99.4, + "PawsXPairClassification": 58.61, + "PawsXPairClassification (de)": 57.14, + "PawsXPairClassification (en)": 62.97, + "PawsXPairClassification (es)": 56.87, + "PawsXPairClassification (fr)": 58.69, + "PawsXPairClassification (ja)": 50.84, + "PawsXPairClassification (ko)": 52.22, + "PawsXPairClassification (zh)": 56.95, + "SICK-E-PL": 75.95, + "SprintDuplicateQuestions": 93.14, + "TERRa": 58.42, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 85.83 + }, + { + "Model": "multilingual-e5-large", + "CDSC-E": 74.47, + "OpusparcusPC (de)": 97.27, + "OpusparcusPC (en)": 98.74, + "OpusparcusPC (fi)": 94.26, + "OpusparcusPC (fr)": 93.68, + "OpusparcusPC (ru)": 89.64, + "OpusparcusPC (sv)": 94.98, + "PSC": 99.4, + "PawsXPairClassification (de)": 56.81, + "PawsXPairClassification (en)": 62.97, + "PawsXPairClassification (es)": 56.85, + "PawsXPairClassification (fr)": 58.68, + "PawsXPairClassification (ja)": 50.7, + "PawsXPairClassification (ko)": 52.08, + "PawsXPairClassification (zh)": 56.82, + "SICK-E-PL": 75.95, + "SprintDuplicateQuestions": 93.14, + "TERRa": 58.4, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 85.83 } ] }, "Reranking": { "map": [ { - "Model": "sentence-t5-base", - "AlloprofReranking": 50.12, - "AskUbuntuDupQuestions": 59.73, - "MindSmallReranking": 30.2, - "SciDocsRR": 73.96, - "StackOverflowDupQuestions": 48.46, - "SyntecReranking": 78.05 + "Model": "multilingual-e5-large", + "AlloprofReranking": 69.44, + "AskUbuntuDupQuestions": 59.24, + "CMedQAv1": 68.25, + "CMedQAv2": 68.56, + "MMarcoReranking": 29.12, + "MindSmallReranking": 30.24, + "RuBQReranking": 75.6, + "SciDocsRR": 84.22, + "StackOverflowDupQuestions": 50.14, + "SyntecReranking": 85.45, + "T2Reranking": 66.32 + }, + { + "Model": "multilingual-e5-large", + "MIRACLReranking (ru)": 63.71 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-t5-base", - "AlloprofRetrieval": 27.52, - "ArguAna": 44.85, - "BSARDRetrieval": 0.16, - "CQADupstackRetrieval": 35.23, - "ClimateFEVER": 10.37, - "DBPedia": 27.77, - "FEVER": 26.17, - "FiQA2018": 34.83, - "HotpotQA": 33.2, - "MSMARCO": 20.7, - "MintakaRetrieval (fr)": 21.04, - "NFCorpus": 28.65, - "NQ": 36.32, - "QuoraRetrieval": 85.49, - "SCIDOCS": 14.15, - "SciFact": 45.76, - "SyntecRetrieval": 67.0, - "TRECCOVID": 40.7, - "Touche2020": 20.3, - "XPQARetrieval (fr)": 45.19 + "Model": "multilingual-e5-large", + "AILACasedocs": 26.43, + "AILAStatutes": 20.84, + "ARCChallenge": 10.83, + "AlloprofRetrieval": 39.34, + "AlphaNLI": 13.59, + "AppsRetrieval": 32.55, + "ArguAna": 54.36, + "ArguAna-PL": 52.99, + "BSARDRetrieval": 21.28, + "CmedqaRetrieval": 28.66, + "CodeFeedbackMT": 42.78, + "CodeFeedbackST": 74.26, + "CodeSearchNetCCRetrieval (python)": 84.45, + "CodeSearchNetCCRetrieval (javascript)": 77.67, + "CodeSearchNetCCRetrieval (go)": 72.08, + "CodeSearchNetCCRetrieval (ruby)": 81.94, + "CodeSearchNetCCRetrieval (java)": 78.65, + "CodeSearchNetCCRetrieval (php)": 72.21, + "CodeSearchNetRetrieval (python)": 89.42, + "CodeSearchNetRetrieval (javascript)": 75.54, + "CodeSearchNetRetrieval (go)": 91.8, + "CodeSearchNetRetrieval (ruby)": 81.43, + "CodeSearchNetRetrieval (java)": 82.05, + "CodeSearchNetRetrieval (php)": 84.5, + "CodeTransOceanContest": 74.03, + "CodeTransOceanDL": 31.28, + "CosQA": 34.8, + "CovidRetrieval": 75.61, + "DBPedia-PL": 35.82, + "DuRetrieval": 85.3, + "EcomRetrieval": 54.67, + "FiQA-PL": 32.97, + "FiQA2018": 43.81, + "GerDaLIRSmall": 15.72, + "HellaSwag": 27.35, + "HotpotQA-PL": 67.41, + "LEMBNarrativeQARetrieval": 24.22, + "LEMBQMSumRetrieval": 24.26, + "LEMBSummScreenFDRetrieval": 71.12, + "LEMBWikimQARetrieval": 56.8, + "LeCaRDv2": 55.83, + "LegalBenchConsumerContractsQA": 73.3, + "LegalBenchCorporateLobbying": 89.72, + "LegalQuAD": 43.17, + "LegalSummarization": 62.1, + "MIRACLRetrieval (ru)": 67.33, + "MMarcoRetrieval": 79.2, + "MSMARCO-PL": 33.38, + "MedicalRetrieval": 51.44, + "MintakaRetrieval": 25.2, + "MintakaRetrieval (ar)": 26.5, + "MintakaRetrieval (de)": 32.77, + "MintakaRetrieval (es)": 34.23, + "MintakaRetrieval (fr)": 34.24, + "MintakaRetrieval (hi)": 27.45, + "MintakaRetrieval (it)": 33.84, + "MintakaRetrieval (ja)": 26.45, + "MintakaRetrieval (pt)": 35.9, + "NFCorpus": 33.95, + "NFCorpus-PL": 30.21, + "NQ-PL": 52.79, + "PIQA": 28.82, + "Quail": 4.85, + "Quora-PL": 83.65, + "RARbCode": 58.92, + "RARbMath": 67.32, + "RiaNewsRetrieval": 80.67, + "RuBQRetrieval": 74.13, + "SCIDOCS": 17.45, + "SCIDOCS-PL": 13.82, + "SIQA": 5.36, + "SciFact": 70.42, + "SciFact-PL": 65.66, + "SpartQA": 5.64, + "StackOverflowQA": 88.89, + "SyntecRetrieval": 82.39, + "SyntheticText2SQL": 53.07, + "T2Retrieval": 76.07, + "TRECCOVID": 71.21, + "TRECCOVID-PL": 69.9, + "TempReasonL1": 1.14, + "TempReasonL2Fact": 42.96, + "TempReasonL2Pure": 2.05, + "TempReasonL3Fact": 38.22, + "TempReasonL3Pure": 8.31, + "Touche2020": 23.13, + "VideoRetrieval": 58.28, + "WinoGrande": 54.99, + "XPQARetrieval": 66.15, + "XPQARetrieval (ara-ara)": 43.69, + "XPQARetrieval (eng-ara)": 30.86, + "XPQARetrieval (ara-eng)": 39.11, + "XPQARetrieval (deu-deu)": 76.83, + "XPQARetrieval (eng-deu)": 42.87, + "XPQARetrieval (deu-eng)": 68.25, + "XPQARetrieval (spa-spa)": 61.77, + "XPQARetrieval (eng-spa)": 37.55, + "XPQARetrieval (spa-eng)": 52.86, + "XPQARetrieval (fra-fra)": 61.38, + "XPQARetrieval (eng-fra)": 39.12, + "XPQARetrieval (fra-eng)": 57.93, + "XPQARetrieval (hin-hin)": 71.07, + "XPQARetrieval (eng-hin)": 32.39, + "XPQARetrieval (hin-eng)": 68.31, + "XPQARetrieval (ita-ita)": 74.32, + "XPQARetrieval (eng-ita)": 37.95, + "XPQARetrieval (ita-eng)": 64.54, + "XPQARetrieval (jpn-jpn)": 74.11, + "XPQARetrieval (eng-jpn)": 38.31, + "XPQARetrieval (jpn-eng)": 65.42, + "XPQARetrieval (kor-kor)": 35.71, + "XPQARetrieval (eng-kor)": 31.09, + "XPQARetrieval (kor-eng)": 34.02, + "XPQARetrieval (pol-pol)": 51.01, + "XPQARetrieval (eng-pol)": 30.49, + "XPQARetrieval (pol-eng)": 44.66, + "XPQARetrieval (por-por)": 41.1, + "XPQARetrieval (eng-por)": 22.03, + "XPQARetrieval (por-eng)": 35.15, + "XPQARetrieval (tam-tam)": 39.47, + "XPQARetrieval (eng-tam)": 17.33, + "XPQARetrieval (tam-eng)": 33.67, + "XPQARetrieval (cmn-cmn)": 66.27, + "XPQARetrieval (eng-cmn)": 26.24, + "XPQARetrieval (cmn-eng)": 55.15 } ] }, "STS": { "cosine_spearman": [ { - "Model": "sentence-t5-base", - "BIOSSES": 75.89, - "SICK-R": 80.18, - "SICKFr": 71.74, - "STS12": 78.05, - "STS13": 85.85, - "STS14": 82.19, - "STS15": 87.46, - "STS16": 84.03, - "STS17 (ar-ar)": 13.36, - "STS17 (en-ar)": -5.65, - "STS17 (en-de)": 67.11, - "STS17 (en-en)": 89.57, - "STS17 (en-tr)": -0.02, - "STS17 (es-en)": 47.72, - "STS17 (es-es)": 79.94, - "STS17 (fr-en)": 56.61, - "STS17 (it-en)": 30.46, - "STS17 (ko-ko)": 10.06, - "STS17 (nl-en)": 36.46, - "STS22 (ar)": 31.2, - "STS22 (de)": 42.08, - "STS22 (de-en)": 46.9, - "STS22 (de-fr)": 55.04, - "STS22 (de-pl)": 33.94, - "STS22 (en)": 62.66, - "STS22 (es)": 53.81, - "STS22 (es-en)": 65.19, - "STS22 (es-it)": 55.29, - "STS22 (fr)": 77.69, - "STS22 (fr-pl)": 28.17, - "STS22 (it)": 60.65, - "STS22 (pl)": 24.42, - "STS22 (pl-en)": 42.97, - "STS22 (ru)": 12.13, - "STS22 (tr)": 40.45, - "STS22 (zh)": 32.9, - "STS22 (zh-en)": 20.15, - "STSBenchmark": 85.52, - "STSBenchmarkMultilingualSTS (fr)": 74.04 + "Model": "multilingual-e5-large", + "AFQMC": 33.02, + "ATEC": 39.81, + "BQ": 46.44, + "CDSC-R": 91.0, + "LCQMC": 75.95, + "PAWSX": 14.63, + "QBQTC": 29.77, + "SICK-R-PL": 75.08, + "SICKFr": 78.78, + "STS22": 34.66, + "STSB": 81.08, + "STSBenchmarkMultilingualSTS": 82.53 + }, + { + "Model": "multilingual-e5-large", + "AFQMC": 33.01, + "ATEC": 39.8, + "BIOSSES": 82.49, + "BQ": 46.44, + "CDSC-R": 91.0, + "LCQMC": 75.95, + "PAWSX": 14.63, + "RUParaPhraserSTS": 71.82, + "RuSTSBenchmarkSTS": 83.15, + "SICK-R": 80.23, + "SICK-R-PL": 75.08, + "SICKFr": 78.81, + "STS12": 80.02, + "STS13": 81.55, + "STS14": 77.72, + "STS15": 89.31, + "STS16": 85.79, + "STS17 (fr-en)": 85.62, + "STS17 (nl-en)": 85.29, + "STS17 (es-es)": 86.71, + "STS17 (ar-ar)": 77.83, + "STS17 (es-en)": 80.74, + "STS17 (en-en)": 88.12, + "STS17 (ko-ko)": 82.27, + "STS17 (en-ar)": 75.03, + "STS17 (en-tr)": 71.22, + "STS17 (en-de)": 86.15, + "STS17 (it-en)": 84.52, + "STS22 (de)": 56.58, + "STS22 (de-fr)": 67.96, + "STS22 (ar)": 56.95, + "STS22 (ru)": 59.89, + "STS22 (de-en)": 56.59, + "STS22 (en)": 63.66, + "STS22 (tr)": 63.56, + "STS22 (pl-en)": 65.54, + "STS22 (zh)": 66.82, + "STS22 (it)": 76.99, + "STS22 (pl)": 34.65, + "STS22 (es-it)": 68.92, + "STS22 (fr)": 76.77, + "STS22 (zh-en)": 65.95, + "STS22 (fr-pl)": 50.71, + "STS22 (es-en)": 72.51, + "STS22 (es)": 64.6, + "STS22 (de-pl)": 49.58, + "STSB": 81.08, + "STSBenchmark": 87.29, + "STSBenchmarkMultilingualSTS (pl)": 81.06, + "STSBenchmarkMultilingualSTS (ru)": 83.05, + "STSBenchmarkMultilingualSTS (de)": 84.27, + "STSBenchmarkMultilingualSTS (pt)": 73.31, + "STSBenchmarkMultilingualSTS (en)": 87.29, + "STSBenchmarkMultilingualSTS (es)": 83.81, + "STSBenchmarkMultilingualSTS (zh)": 81.22, + "STSBenchmarkMultilingualSTS (it)": 81.75, + "STSBenchmarkMultilingualSTS (nl)": 81.63, + "STSBenchmarkMultilingualSTS (fr)": 83.28 + }, + { + "Model": "multilingual-e5-large", + "AFQMC": 33.01, + "ATEC": 39.8, + "BIOSSES": 82.49, + "BQ": 46.44, + "CDSC-R": 91.0, + "LCQMC": 75.95, + "PAWSX": 14.63, + "RUParaPhraserSTS": 71.82, + "RuSTSBenchmarkSTS": 83.15, + "SICK-R": 80.23, + "SICK-R-PL": 75.08, + "SICKFr": 78.81, + "STS12": 80.02, + "STS13": 81.55, + "STS14": 77.72, + "STS15": 89.31, + "STS16": 85.79, + "STS17 (fr-en)": 85.62, + "STS17 (nl-en)": 85.29, + "STS17 (es-es)": 86.71, + "STS17 (ar-ar)": 77.83, + "STS17 (es-en)": 80.74, + "STS17 (en-en)": 88.12, + "STS17 (ko-ko)": 82.27, + "STS17 (en-ar)": 75.03, + "STS17 (en-tr)": 71.22, + "STS17 (en-de)": 86.15, + "STS17 (it-en)": 84.52, + "STS22 (de)": 56.58, + "STS22 (de-fr)": 67.96, + "STS22 (ar)": 56.95, + "STS22 (ru)": 59.89, + "STS22 (de-en)": 56.59, + "STS22 (en)": 63.66, + "STS22 (tr)": 63.56, + "STS22 (pl-en)": 65.54, + "STS22 (zh)": 66.82, + "STS22 (it)": 76.99, + "STS22 (pl)": 34.65, + "STS22 (es-it)": 68.92, + "STS22 (fr)": 76.77, + "STS22 (zh-en)": 65.95, + "STS22 (fr-pl)": 50.71, + "STS22 (es-en)": 72.51, + "STS22 (es)": 64.6, + "STS22 (de-pl)": 49.58, + "STSB": 81.08, + "STSBenchmark": 87.29, + "STSBenchmarkMultilingualSTS (pl)": 81.06, + "STSBenchmarkMultilingualSTS (ru)": 83.05, + "STSBenchmarkMultilingualSTS (de)": 84.27, + "STSBenchmarkMultilingualSTS (pt)": 73.31, + "STSBenchmarkMultilingualSTS (en)": 87.29, + "STSBenchmarkMultilingualSTS (es)": 83.81, + "STSBenchmarkMultilingualSTS (zh)": 81.22, + "STSBenchmarkMultilingualSTS (it)": 81.75, + "STSBenchmarkMultilingualSTS (nl)": 81.63, + "STSBenchmarkMultilingualSTS (fr)": 83.28 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "sentence-t5-base", - "SummEval": 31.39, - "SummEvalFr": 30.01 + "Model": "multilingual-e5-large", + "SummEval": 29.64, + "SummEvalFr": 30.92 + }, + { + "Model": "multilingual-e5-large", + "SummEval": 29.64, + "SummEvalFr": 30.92 + }, + { + "Model": "multilingual-e5-large", + "SummEvalFr": 30.92 + } + ] + }, + "MultilabelClassification": { + "accuracy": [ + { + "Model": "multilingual-e5-large", + "CEDRClassification": 44.84, + "SensitiveTopicsClassification": 27.17 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "multilingual-e5-large", + "Core17InstructionRetrieval": -1.62, + "News21InstructionRetrieval": -0.06, + "Robust04InstructionRetrieval": -7.48 } ] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] } }, - "udever-bloom-560m": { + "multilingual-e5-small": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "multilingual-e5-small", + "BornholmBitextMining": 43.89, + "Tatoeba (dan-eng)": 86.38, + "Tatoeba (aze-eng)": 80.79, + "Tatoeba (tur-eng)": 88.42, + "Tatoeba (ces-eng)": 80.99, + "Tatoeba (swh-eng)": 65.43, + "Tatoeba (hrv-eng)": 84.42, + "Tatoeba (est-eng)": 56.47, + "Tatoeba (bel-eng)": 80.89, + "Tatoeba (mhr-eng)": 5.58, + "Tatoeba (cat-eng)": 79.3, + "Tatoeba (ben-eng)": 81.4, + "Tatoeba (tgl-eng)": 77.54, + "Tatoeba (cym-eng)": 62.3, + "Tatoeba (tha-eng)": 90.88, + "Tatoeba (swe-eng)": 87.46, + "Tatoeba (oci-eng)": 38.27, + "Tatoeba (gsw-eng)": 40.13, + "Tatoeba (ceb-eng)": 42.35, + "Tatoeba (ind-eng)": 88.28, + "Tatoeba (ron-eng)": 85.68, + "Tatoeba (gla-eng)": 35.96, + "Tatoeba (ile-eng)": 70.31, + "Tatoeba (orv-eng)": 14.89, + "Tatoeba (vie-eng)": 89.03, + "Tatoeba (slv-eng)": 73.93, + "Tatoeba (tel-eng)": 86.82, + "Tatoeba (cmn-eng)": 89.85, + "Tatoeba (fao-eng)": 56.57, + "Tatoeba (glg-eng)": 79.65, + "Tatoeba (tuk-eng)": 16.99, + "Tatoeba (war-eng)": 39.14, + "Tatoeba (ita-eng)": 88.54, + "Tatoeba (epo-eng)": 88.96, + "Tatoeba (fra-eng)": 90.51, + "Tatoeba (pol-eng)": 88.85, + "Tatoeba (khm-eng)": 44.34, + "Tatoeba (fin-eng)": 70.23, + "Tatoeba (zsm-eng)": 91.37, + "Tatoeba (bre-eng)": 7.09, + "Tatoeba (kur-eng)": 39.99, + "Tatoeba (yid-eng)": 65.9, + "Tatoeba (kaz-eng)": 70.57, + "Tatoeba (cbk-eng)": 55.36, + "Tatoeba (mkd-eng)": 63.74, + "Tatoeba (hsb-eng)": 36.49, + "Tatoeba (deu-eng)": 97.22, + "Tatoeba (isl-eng)": 62.32, + "Tatoeba (kzj-eng)": 6.56, + "Tatoeba (lat-eng)": 37.76, + "Tatoeba (rus-eng)": 89.77, + "Tatoeba (ber-eng)": 18.22, + "Tatoeba (nld-eng)": 91.87, + "Tatoeba (lit-eng)": 59.95, + "Tatoeba (uig-eng)": 60.59, + "Tatoeba (kab-eng)": 18.06, + "Tatoeba (nov-eng)": 64.2, + "Tatoeba (heb-eng)": 73.68, + "Tatoeba (eus-eng)": 50.9, + "Tatoeba (kat-eng)": 77.6, + "Tatoeba (xho-eng)": 63.2, + "Tatoeba (yue-eng)": 69.33, + "Tatoeba (ina-eng)": 86.39, + "Tatoeba (bos-eng)": 81.15, + "Tatoeba (lfn-eng)": 51.46, + "Tatoeba (nno-eng)": 70.29, + "Tatoeba (urd-eng)": 85.07, + "Tatoeba (arq-eng)": 23.62, + "Tatoeba (tam-eng)": 82.82, + "Tatoeba (ang-eng)": 30.3, + "Tatoeba (swg-eng)": 44.0, + "Tatoeba (csb-eng)": 26.23, + "Tatoeba (wuu-eng)": 67.3, + "Tatoeba (max-eng)": 48.29, + "Tatoeba (tzl-eng)": 34.83, + "Tatoeba (uzb-eng)": 59.11, + "Tatoeba (amh-eng)": 74.11, + "Tatoeba (fry-eng)": 49.05, + "Tatoeba (mar-eng)": 85.94, + "Tatoeba (pes-eng)": 85.51, + "Tatoeba (por-eng)": 89.63, + "Tatoeba (hin-eng)": 92.36, + "Tatoeba (tat-eng)": 66.8, + "Tatoeba (sqi-eng)": 86.21, + "Tatoeba (cor-eng)": 5.24, + "Tatoeba (slk-eng)": 79.86, + "Tatoeba (dtp-eng)": 6.42, + "Tatoeba (jpn-eng)": 77.43, + "Tatoeba (ell-eng)": 86.81, + "Tatoeba (lvs-eng)": 61.84, + "Tatoeba (cha-eng)": 24.88, + "Tatoeba (arz-eng)": 53.35, + "Tatoeba (dsb-eng)": 29.87, + "Tatoeba (kor-eng)": 73.74, + "Tatoeba (srp-eng)": 83.06, + "Tatoeba (mal-eng)": 94.78, + "Tatoeba (hye-eng)": 83.81, + "Tatoeba (spa-eng)": 93.01, + "Tatoeba (ara-eng)": 76.09, + "Tatoeba (bul-eng)": 85.47, + "Tatoeba (jav-eng)": 53.39, + "Tatoeba (ukr-eng)": 82.98, + "Tatoeba (awa-eng)": 74.55, + "Tatoeba (nob-eng)": 90.18, + "Tatoeba (ido-eng)": 70.07, + "Tatoeba (hun-eng)": 74.44, + "Tatoeba (nds-eng)": 52.46, + "Tatoeba (pam-eng)": 5.76, + "Tatoeba (ast-eng)": 62.81, + "Tatoeba (pms-eng)": 35.47, + "Tatoeba (afr-eng)": 85.17, + "Tatoeba (gle-eng)": 56.32, + "Tatoeba (mon-eng)": 77.7 + } + ] }, "Classification": { "accuracy": [ { - "Model": "udever-bloom-560m", - "AmazonReviewsClassification (fr)": 26.85, - "MTOPDomainClassification (fr)": 34.99, - "MTOPIntentClassification (fr)": 15.76, - "MasakhaNEWSClassification (fra)": 67.94, - "MassiveIntentClassification (fr)": 15.09, - "MassiveScenarioClassification (fr)": 21.67 + "Model": "multilingual-e5-small", + "AllegroReviews": 37.42, + "AmazonCounterfactualClassification (en-ext)": 73.07, + "AmazonCounterfactualClassification (en)": 71.87, + "AmazonCounterfactualClassification (de)": 71.72, + "AmazonCounterfactualClassification (ja)": 61.46, + "AmazonPolarityClassification": 88.61, + "AmazonReviewsClassification (en)": 45.75, + "AmazonReviewsClassification (de)": 41.07, + "AmazonReviewsClassification (es)": 41.37, + "AmazonReviewsClassification (fr)": 39.47, + "AmazonReviewsClassification (ja)": 38.55, + "AmazonReviewsClassification (zh)": 38.31, + "AmazonReviewsClassification": 39.68, + "AngryTweetsClassification": 53.57, + "Banking77Classification": 70.44, + "CBD": 63.25, + "DKHateClassification": 60.73, + "DanishPoliticalCommentsClassification": 34.38, + "EmotionClassification": 42.86, + "GeoreviewClassification": 44.66, + "HeadlineClassification": 73.94, + "IFlyTek": 47.35, + "ImdbClassification": 79.57, + "InappropriatenessClassification": 59.16, + "JDReview": 79.34, + "KinopoiskClassification": 49.96, + "LccSentimentClassification": 57.87, + "MTOPDomainClassification (en)": 88.99, + "MTOPDomainClassification (de)": 86.15, + "MTOPDomainClassification (es)": 85.53, + "MTOPDomainClassification (fr)": 81.5, + "MTOPDomainClassification (hi)": 84.07, + "MTOPDomainClassification (th)": 83.16, + "MTOPDomainClassification": 81.2, + "MTOPIntentClassification (en)": 56.69, + "MTOPIntentClassification (de)": 55.88, + "MTOPIntentClassification (es)": 53.15, + "MTOPIntentClassification (fr)": 44.35, + "MTOPIntentClassification (hi)": 52.26, + "MTOPIntentClassification (th)": 54.61, + "MTOPIntentClassification": 46.01, + "MasakhaNEWSClassification (amh)": 84.28, + "MasakhaNEWSClassification (eng)": 75.61, + "MasakhaNEWSClassification (fra)": 74.67, + "MasakhaNEWSClassification (hau)": 73.08, + "MasakhaNEWSClassification (ibo)": 63.9, + "MasakhaNEWSClassification (lin)": 73.37, + "MasakhaNEWSClassification (lug)": 67.89, + "MasakhaNEWSClassification (orm)": 68.77, + "MasakhaNEWSClassification (pcm)": 90.79, + "MasakhaNEWSClassification (run)": 75.4, + "MasakhaNEWSClassification (sna)": 82.76, + "MasakhaNEWSClassification (som)": 59.8, + "MasakhaNEWSClassification (swa)": 69.85, + "MasakhaNEWSClassification (tir)": 68.01, + "MasakhaNEWSClassification (xho)": 72.22, + "MasakhaNEWSClassification (yor)": 73.84, + "MasakhaNEWSClassification": 77.65, + "MassiveIntentClassification (is)": 41.53, + "MassiveIntentClassification (tl)": 48.7, + "MassiveIntentClassification (he)": 51.11, + "MassiveIntentClassification (ta)": 47.65, + "MassiveIntentClassification (ar)": 47.78, + "MassiveIntentClassification (my)": 45.64, + "MassiveIntentClassification (sl)": 47.71, + "MassiveIntentClassification (af)": 48.74, + "MassiveIntentClassification (de)": 55.52, + "MassiveIntentClassification (pl)": 57.33, + "MassiveIntentClassification (en)": 63.87, + "MassiveIntentClassification (fi)": 55.14, + "MassiveIntentClassification (lv)": 44.93, + "MassiveIntentClassification (fr)": 57.9, + "MassiveIntentClassification (ur)": 50.51, + "MassiveIntentClassification (mn)": 47.38, + "MassiveIntentClassification (it)": 58.8, + "MassiveIntentClassification (ko)": 57.12, + "MassiveIntentClassification (nb)": 55.36, + "MassiveIntentClassification (es)": 59.19, + "MassiveIntentClassification (ja)": 61.58, + "MassiveIntentClassification (da)": 56.12, + "MassiveIntentClassification (zh-TW)": 53.75, + "MassiveIntentClassification (id)": 56.2, + "MassiveIntentClassification (ka)": 39.52, + "MassiveIntentClassification (hi)": 55.69, + "MassiveIntentClassification (cy)": 36.62, + "MassiveIntentClassification (kn)": 47.85, + "MassiveIntentClassification (pt)": 60.12, + "MassiveIntentClassification (th)": 56.26, + "MassiveIntentClassification (fa)": 57.73, + "MassiveIntentClassification (bn)": 50.68, + "MassiveIntentClassification (ml)": 52.81, + "MassiveIntentClassification (ro)": 52.82, + "MassiveIntentClassification (am)": 43.52, + "MassiveIntentClassification (hu)": 53.21, + "MassiveIntentClassification (sw)": 44.84, + "MassiveIntentClassification (ms)": 50.8, + "MassiveIntentClassification (tr)": 56.88, + "MassiveIntentClassification (km)": 33.45, + "MassiveIntentClassification (ru)": 58.43, + "MassiveIntentClassification (az)": 49.32, + "MassiveIntentClassification (te)": 48.85, + "MassiveIntentClassification (nl)": 59.27, + "MassiveIntentClassification (zh-CN)": 62.04, + "MassiveIntentClassification (sq)": 48.68, + "MassiveIntentClassification (vi)": 56.19, + "MassiveIntentClassification (jv)": 42.96, + "MassiveIntentClassification (sv)": 58.2, + "MassiveIntentClassification (hy)": 47.89, + "MassiveIntentClassification (el)": 54.14, + "MassiveIntentClassification": 57.4, + "MassiveScenarioClassification (de)": 65.88, + "MassiveScenarioClassification (nb)": 61.96, + "MassiveScenarioClassification (th)": 65.72, + "MassiveScenarioClassification (ka)": 44.96, + "MassiveScenarioClassification (jv)": 51.39, + "MassiveScenarioClassification (sv)": 67.33, + "MassiveScenarioClassification (fr)": 63.9, + "MassiveScenarioClassification (tl)": 55.3, + "MassiveScenarioClassification (hu)": 61.93, + "MassiveScenarioClassification (ur)": 55.91, + "MassiveScenarioClassification (ms)": 59.18, + "MassiveScenarioClassification (az)": 53.27, + "MassiveScenarioClassification (af)": 58.0, + "MassiveScenarioClassification (zh-TW)": 61.15, + "MassiveScenarioClassification (lv)": 51.0, + "MassiveScenarioClassification (km)": 39.01, + "MassiveScenarioClassification (el)": 62.29, + "MassiveScenarioClassification (bn)": 57.38, + "MassiveScenarioClassification (da)": 64.03, + "MassiveScenarioClassification (ml)": 60.31, + "MassiveScenarioClassification (ro)": 60.0, + "MassiveScenarioClassification (ru)": 63.89, + "MassiveScenarioClassification (it)": 64.03, + "MassiveScenarioClassification (am)": 50.53, + "MassiveScenarioClassification (is)": 49.66, + "MassiveScenarioClassification (ja)": 67.75, + "MassiveScenarioClassification (zh-CN)": 68.96, + "MassiveScenarioClassification (id)": 62.0, + "MassiveScenarioClassification (tr)": 62.14, + "MassiveScenarioClassification (fa)": 63.32, + "MassiveScenarioClassification (ta)": 52.74, + "MassiveScenarioClassification (kn)": 52.73, + "MassiveScenarioClassification (pt)": 62.75, + "MassiveScenarioClassification (cy)": 44.63, + "MassiveScenarioClassification (my)": 51.07, + "MassiveScenarioClassification (es)": 64.43, + "MassiveScenarioClassification (hi)": 62.22, + "MassiveScenarioClassification (te)": 54.86, + "MassiveScenarioClassification (mn)": 52.41, + "MassiveScenarioClassification (ko)": 65.7, + "MassiveScenarioClassification (sl)": 54.05, + "MassiveScenarioClassification (sw)": 52.42, + "MassiveScenarioClassification (hy)": 52.93, + "MassiveScenarioClassification (nl)": 67.01, + "MassiveScenarioClassification (sq)": 56.15, + "MassiveScenarioClassification (fi)": 61.89, + "MassiveScenarioClassification (en)": 69.28, + "MassiveScenarioClassification (vi)": 62.67, + "MassiveScenarioClassification (he)": 59.22, + "MassiveScenarioClassification (ar)": 54.56, + "MassiveScenarioClassification (pl)": 64.27, + "MassiveScenarioClassification": 64.25, + "MultilingualSentiment": 64.74, + "NoRecClassification": 53.96, + "NordicLangClassification": 75.15, + "NorwegianParliament": 60.15, + "OnlineShopping": 88.73, + "PAC": 70.55, + "PolEmo2.0-IN": 67.35, + "PolEmo2.0-OUT": 39.13, + "RuReviewsClassification": 61.18, + "RuSciBenchGRNTIClassification": 54.99, + "RuSciBenchOECDClassification": 41.72, + "ScalaDaClassification": 50.3, + "ScalaNbClassification": 50.06, + "TNews": 48.38, + "ToxicConversationsClassification": 63.59, + "TweetSentimentExtractionClassification": 62.79, + "Waimai": 83.9 } ] }, "Clustering": { "v_measure": [ { - "Model": "udever-bloom-560m", - "AlloProfClusteringP2P": 53.57, - "AlloProfClusteringS2S": 22.13, - "HALClusteringS2S": 7.68, - "MLSUMClusteringP2P": 36.43, - "MLSUMClusteringS2S": 25.26, - "MasakhaNEWSClusteringP2P (fra)": 37.57, - "MasakhaNEWSClusteringS2S (fra)": 20.58 + "Model": "multilingual-e5-small", + "8TagsClustering": 23.92, + "AlloProfClusteringP2P": 60.89, + "AlloProfClusteringS2S": 32.52, + "BiorxivClusteringP2P": 35.84, + "BiorxivClusteringS2S": 27.35, + "CLSClusteringP2P": 39.14, + "CLSClusteringS2S": 37.79, + "GeoreviewClusteringP2P": 58.57, + "HALClusteringS2S": 18.95, + "MLSUMClusteringP2P (ru)": 39.69, + "MLSUMClusteringP2P": 43.2, + "MLSUMClusteringS2S (ru)": 39.9, + "MLSUMClusteringS2S": 37.61, + "MasakhaNEWSClusteringP2P (amh)": 66.2, + "MasakhaNEWSClusteringP2P (eng)": 50.08, + "MasakhaNEWSClusteringP2P (fra)": 56.32, + "MasakhaNEWSClusteringP2P (hau)": 53.63, + "MasakhaNEWSClusteringP2P (ibo)": 49.19, + "MasakhaNEWSClusteringP2P (lin)": 55.06, + "MasakhaNEWSClusteringP2P (lug)": 59.97, + "MasakhaNEWSClusteringP2P (orm)": 32.72, + "MasakhaNEWSClusteringP2P (pcm)": 62.22, + "MasakhaNEWSClusteringP2P (run)": 57.52, + "MasakhaNEWSClusteringP2P (sna)": 45.11, + "MasakhaNEWSClusteringP2P (som)": 42.39, + "MasakhaNEWSClusteringP2P (swa)": 23.77, + "MasakhaNEWSClusteringP2P (tir)": 57.68, + "MasakhaNEWSClusteringP2P (xho)": 39.96, + "MasakhaNEWSClusteringP2P (yor)": 26.56, + "MasakhaNEWSClusteringP2P": 40.12, + "MasakhaNEWSClusteringS2S (amh)": 55.48, + "MasakhaNEWSClusteringS2S (eng)": 37.79, + "MasakhaNEWSClusteringS2S (fra)": 35.8, + "MasakhaNEWSClusteringS2S (hau)": 20.22, + "MasakhaNEWSClusteringS2S (ibo)": 35.67, + "MasakhaNEWSClusteringS2S (lin)": 41.12, + "MasakhaNEWSClusteringS2S (lug)": 48.63, + "MasakhaNEWSClusteringS2S (orm)": 29.16, + "MasakhaNEWSClusteringS2S (pcm)": 65.36, + "MasakhaNEWSClusteringS2S (run)": 45.5, + "MasakhaNEWSClusteringS2S (sna)": 47.61, + "MasakhaNEWSClusteringS2S (som)": 28.59, + "MasakhaNEWSClusteringS2S (swa)": 13.91, + "MasakhaNEWSClusteringS2S (tir)": 50.51, + "MasakhaNEWSClusteringS2S (xho)": 37.26, + "MasakhaNEWSClusteringS2S (yor)": 23.38, + "MasakhaNEWSClusteringS2S": 39.22, + "MedrxivClusteringP2P": 30.72, + "MedrxivClusteringS2S": 27.0, + "RedditClustering": 40.12, + "RedditClusteringP2P": 59.49, + "RuSciBenchGRNTIClusteringP2P": 51.14, + "RuSciBenchOECDClusteringP2P": 44.33, + "StackExchangeClustering": 53.32, + "StackExchangeClusteringP2P": 31.87, + "ThuNewsClusteringP2P": 55.18, + "ThuNewsClusteringS2S": 48.93, + "TwentyNewsgroupsClustering": 33.67 } ] }, "PairClassification": { "max_ap": [ { - "Model": "udever-bloom-560m", - "OpusparcusPC (fr)": 82.1, - "PawsXPairClassification (fr)": 59.69 + "Model": "multilingual-e5-small", + "CDSC-E": 69.69, + "OpusparcusPC (de)": 94.9, + "OpusparcusPC (en)": 98.42, + "OpusparcusPC (fi)": 88.29, + "OpusparcusPC (fr)": 91.77, + "OpusparcusPC (ru)": 84.79, + "OpusparcusPC (sv)": 91.07, + "PSC": 99.23, + "PawsXPairClassification (de)": 52.13, + "PawsXPairClassification (en)": 53.91, + "PawsXPairClassification (es)": 51.39, + "PawsXPairClassification (fr)": 52.69, + "PawsXPairClassification (ja)": 48.24, + "PawsXPairClassification (ko)": 49.95, + "PawsXPairClassification (zh)": 54.01, + "SICK-E-PL": 66.35, + "SprintDuplicateQuestions": 92.18, + "TERRa": 55.14, + "TwitterSemEval2015": 70.75, + "TwitterURLCorpus": 85.03 }, { - "Model": "udever-bloom-560m", - "OpusparcusPC (fr)": 85.87, - "PawsXPairClassification (fr)": 61.99 + "Model": "multilingual-e5-small", + "CDSC-E": 69.84, + "Cmnli": 72.12, + "Ocnli": 60.77, + "OpusparcusPC (de)": 94.9, + "OpusparcusPC (en)": 98.42, + "OpusparcusPC (fi)": 88.29, + "OpusparcusPC (fr)": 91.77, + "OpusparcusPC (ru)": 84.79, + "OpusparcusPC (sv)": 91.07, + "OpusparcusPC": 92.52, + "PPC": 86.79, + "PSC": 99.24, + "PawsXPairClassification (de)": 52.19, + "PawsXPairClassification (en)": 54.01, + "PawsXPairClassification (es)": 51.46, + "PawsXPairClassification (fr)": 52.77, + "PawsXPairClassification (ja)": 48.41, + "PawsXPairClassification (ko)": 49.98, + "PawsXPairClassification (zh)": 54.03, + "PawsXPairClassification": 55.72, + "SICK-E-PL": 66.34, + "SprintDuplicateQuestions": 92.42, + "TERRa": 55.14, + "TwitterSemEval2015": 70.75, + "TwitterURLCorpus": 85.03 + }, + { + "Model": "multilingual-e5-small", + "CDSC-E": 69.7, + "Cmnli": 72.12, + "Ocnli": 60.77, + "OpusparcusPC": 92.52, + "PPC": 86.72, + "PSC": 99.24, + "PawsXPairClassification": 55.68, + "SICK-E-PL": 66.34 } ] }, "Reranking": { "map": [ { - "Model": "udever-bloom-560m", - "AlloprofReranking": 28.75, - "SyntecReranking": 50.88 + "Model": "multilingual-e5-small", + "AlloprofReranking": 56.17, + "AskUbuntuDupQuestions": 56.42, + "CMedQAv1": 63.44, + "CMedQAv2": 62.41, + "MMarcoReranking": 24.33, + "MindSmallReranking": 29.96, + "RuBQReranking": 71.45, + "SciDocsRR": 78.26, + "StackOverflowDupQuestions": 46.97, + "SyntecReranking": 86.7, + "T2Reranking": 65.24 + }, + { + "Model": "multilingual-e5-small", + "MIRACLReranking (ru)": 59.12 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "udever-bloom-560m", - "AlloprofRetrieval": 1.98, + "Model": "multilingual-e5-small", + "AILACasedocs": 23.43, + "AILAStatutes": 19.01, + "ARCChallenge": 7.14, + "AlloprofRetrieval": 27.01, + "AlphaNLI": 13.0, + "AppsRetrieval": 12.01, + "ArguAna": 39.09, + "ArguAna-PL": 37.43, "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.48, - "SyntecRetrieval": 24.45, - "XPQARetrieval (fr)": 12.98 + "CmedqaRetrieval": 24.38, + "CodeFeedbackMT": 41.32, + "CodeFeedbackST": 72.67, + "CodeSearchNetCCRetrieval (python)": 80.75, + "CodeSearchNetCCRetrieval (javascript)": 77.6, + "CodeSearchNetCCRetrieval (go)": 69.66, + "CodeSearchNetCCRetrieval (ruby)": 72.75, + "CodeSearchNetCCRetrieval (java)": 75.58, + "CodeSearchNetCCRetrieval (php)": 69.38, + "CodeSearchNetRetrieval (python)": 86.31, + "CodeSearchNetRetrieval (javascript)": 69.53, + "CodeSearchNetRetrieval (go)": 90.3, + "CodeSearchNetRetrieval (ruby)": 76.58, + "CodeSearchNetRetrieval (java)": 74.96, + "CodeSearchNetRetrieval (php)": 80.28, + "CodeTransOceanContest": 69.76, + "CodeTransOceanDL": 32.3, + "CosQA": 29.61, + "CovidRetrieval": 72.82, + "DBPedia-PL": 29.27, + "DuRetrieval": 81.35, + "EcomRetrieval": 53.56, + "FiQA-PL": 22.03, + "FiQA2018": 33.13, + "GerDaLIRSmall": 14.81, + "HellaSwag": 23.73, + "HotpotQA-PL": 60.15, + "LEMBNarrativeQARetrieval": 22.6, + "LEMBQMSumRetrieval": 21.51, + "LEMBSummScreenFDRetrieval": 62.75, + "LEMBWikimQARetrieval": 57.13, + "LeCaRDv2": 61.58, + "LegalBenchConsumerContractsQA": 66.98, + "LegalBenchCorporateLobbying": 89.47, + "LegalQuAD": 47.8, + "LegalSummarization": 55.76, + "MIRACLRetrieval (ru)": 59.01, + "MMarcoRetrieval": 73.17, + "MSMARCO-PL": 26.94, + "MedicalRetrieval": 44.84, + "MintakaRetrieval (ar)": 21.22, + "MintakaRetrieval (de)": 25.6, + "MintakaRetrieval (es)": 26.4, + "MintakaRetrieval (fr)": 25.0, + "MintakaRetrieval (hi)": 21.1, + "MintakaRetrieval (it)": 26.25, + "MintakaRetrieval (ja)": 20.69, + "MintakaRetrieval (pt)": 24.44, + "MintakaRetrieval": 22.53, + "NFCorpus": 31.0, + "NFCorpus-PL": 26.48, + "NQ-PL": 40.46, + "PIQA": 21.08, + "Quail": 2.38, + "Quora-PL": 78.7, + "RARbCode": 46.96, + "RARbMath": 63.91, + "RiaNewsRetrieval": 70.0, + "RuBQRetrieval": 68.53, + "SCIDOCS": 13.9, + "SCIDOCS-PL": 11.6, + "SIQA": 2.57, + "SciFact": 67.7, + "SciFact-PL": 62.76, + "SpartQA": 5.43, + "StackOverflowQA": 81.94, + "SyntecRetrieval": 75.76, + "SyntheticText2SQL": 46.29, + "T2Retrieval": 71.39, + "TRECCOVID": 72.57, + "TRECCOVID-PL": 70.92, + "TempReasonL1": 0.8, + "TempReasonL2Fact": 36.76, + "TempReasonL2Pure": 0.62, + "TempReasonL3Fact": 32.42, + "TempReasonL3Pure": 6.36, + "Touche2020": 21.16, + "VideoRetrieval": 58.09, + "WinoGrande": 37.46, + "XPQARetrieval (ara-ara)": 39.93, + "XPQARetrieval (eng-ara)": 18.09, + "XPQARetrieval (ara-eng)": 31.64, + "XPQARetrieval (deu-deu)": 69.43, + "XPQARetrieval (eng-deu)": 25.14, + "XPQARetrieval (deu-eng)": 52.36, + "XPQARetrieval (spa-spa)": 55.71, + "XPQARetrieval (eng-spa)": 22.5, + "XPQARetrieval (spa-eng)": 42.4, + "XPQARetrieval (fra-fra)": 57.17, + "XPQARetrieval (eng-fra)": 27.69, + "XPQARetrieval (fra-eng)": 47.46, + "XPQARetrieval (hin-hin)": 68.15, + "XPQARetrieval (eng-hin)": 25.82, + "XPQARetrieval (hin-eng)": 63.79, + "XPQARetrieval (ita-ita)": 67.71, + "XPQARetrieval (eng-ita)": 22.97, + "XPQARetrieval (ita-eng)": 46.61, + "XPQARetrieval (jpn-jpn)": 69.49, + "XPQARetrieval (eng-jpn)": 25.08, + "XPQARetrieval (jpn-eng)": 54.6, + "XPQARetrieval (kor-kor)": 32.98, + "XPQARetrieval (eng-kor)": 22.38, + "XPQARetrieval (kor-eng)": 22.99, + "XPQARetrieval (pol-pol)": 43.37, + "XPQARetrieval (eng-pol)": 19.89, + "XPQARetrieval (pol-eng)": 28.72, + "XPQARetrieval (por-por)": 41.8, + "XPQARetrieval (eng-por)": 15.79, + "XPQARetrieval (por-eng)": 33.77, + "XPQARetrieval (tam-tam)": 31.65, + "XPQARetrieval (eng-tam)": 13.18, + "XPQARetrieval (tam-eng)": 26.44, + "XPQARetrieval (cmn-cmn)": 63.98, + "XPQARetrieval (eng-cmn)": 16.52, + "XPQARetrieval (cmn-eng)": 45.32, + "XPQARetrieval": 57.47 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "multilingual-e5-small", + "AFQMC": 25.21, + "ATEC": 35.14, + "BIOSSES": 82.46, + "BQ": 43.27, + "CDSC-R": 90.27, + "LCQMC": 72.7, + "PAWSX": 11.0, + "RUParaPhraserSTS": 70.46, + "RuSTSBenchmarkSTS": 78.08, + "SICK-R": 77.51, + "SICK-R-PL": 69.45, + "SICKFr": 74.67, + "STS12": 76.56, + "STS13": 76.97, + "STS14": 75.52, + "STS15": 87.12, + "STS16": 83.63, + "STS17 (it-en)": 77.31, + "STS17 (en-ar)": 57.39, + "STS17 (en-en)": 86.42, + "STS17 (en-tr)": 55.93, + "STS17 (ar-ar)": 73.0, + "STS17 (nl-en)": 75.43, + "STS17 (ko-ko)": 78.87, + "STS17 (fr-en)": 72.28, + "STS17 (es-es)": 84.83, + "STS17 (en-de)": 76.82, + "STS17 (es-en)": 72.43, + "STS22 (pl-en)": 72.69, + "STS22 (de-en)": 56.07, + "STS22 (es)": 66.86, + "STS22 (ar)": 56.65, + "STS22 (ru)": 59.9, + "STS22 (de)": 53.45, + "STS22 (zh-en)": 65.32, + "STS22 (en)": 61.25, + "STS22 (fr)": 76.58, + "STS22 (it)": 76.53, + "STS22 (es-it)": 71.74, + "STS22 (es-en)": 74.2, + "STS22 (de-fr)": 60.62, + "STS22 (tr)": 63.69, + "STS22 (pl)": 35.78, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 66.85, + "STS22 (de-pl)": 28.24, + "STSB": 77.73, + "STSBenchmark": 84.11, + "STSBenchmarkMultilingualSTS (zh)": 78.49, + "STSBenchmarkMultilingualSTS (pl)": 72.61, + "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (en)": 84.11, + "STSBenchmarkMultilingualSTS (pt)": 77.39, + "STSBenchmarkMultilingualSTS (it)": 78.21, + "STSBenchmarkMultilingualSTS (fr)": 79.2, + "STSBenchmarkMultilingualSTS (de)": 79.17, + "STSBenchmarkMultilingualSTS (nl)": 76.04, + "STSBenchmarkMultilingualSTS (es)": 80.31 + }, + { + "Model": "multilingual-e5-small", + "AFQMC": 25.21, + "ATEC": 35.14, + "BIOSSES": 82.46, + "BQ": 43.27, + "CDSC-R": 90.27, + "LCQMC": 72.7, + "PAWSX": 11.0, + "RUParaPhraserSTS": 70.46, + "RuSTSBenchmarkSTS": 78.08, + "SICK-R": 77.51, + "SICK-R-PL": 69.45, + "SICKFr": 74.67, + "STS12": 76.56, + "STS13": 76.97, + "STS14": 75.52, + "STS15": 87.12, + "STS16": 83.63, + "STS17 (it-en)": 77.31, + "STS17 (en-ar)": 57.39, + "STS17 (en-en)": 86.42, + "STS17 (en-tr)": 55.93, + "STS17 (ar-ar)": 73.0, + "STS17 (nl-en)": 75.43, + "STS17 (ko-ko)": 78.87, + "STS17 (fr-en)": 72.28, + "STS17 (es-es)": 84.83, + "STS17 (en-de)": 76.82, + "STS17 (es-en)": 72.43, + "STS22 (pl-en)": 72.69, + "STS22 (de-en)": 56.07, + "STS22 (es)": 66.86, + "STS22 (ar)": 56.65, + "STS22 (ru)": 59.9, + "STS22 (de)": 53.45, + "STS22 (zh-en)": 65.32, + "STS22 (en)": 61.25, + "STS22 (fr)": 76.58, + "STS22 (it)": 76.53, + "STS22 (es-it)": 71.74, + "STS22 (es-en)": 74.2, + "STS22 (de-fr)": 60.62, + "STS22 (tr)": 63.69, + "STS22 (pl)": 35.78, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 66.85, + "STS22 (de-pl)": 28.24, + "STSB": 77.73, + "STSBenchmark": 84.11, + "STSBenchmarkMultilingualSTS (zh)": 78.49, + "STSBenchmarkMultilingualSTS (pl)": 72.61, + "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (en)": 84.11, + "STSBenchmarkMultilingualSTS (pt)": 77.39, + "STSBenchmarkMultilingualSTS (it)": 78.21, + "STSBenchmarkMultilingualSTS (fr)": 79.2, + "STSBenchmarkMultilingualSTS (de)": 79.17, + "STSBenchmarkMultilingualSTS (nl)": 76.04, + "STSBenchmarkMultilingualSTS (es)": 80.31 + }, + { + "Model": "multilingual-e5-small", + "AFQMC": 25.21, + "ATEC": 35.14, + "BQ": 43.27, + "CDSC-R": 90.27, + "LCQMC": 72.7, + "PAWSX": 11.01, + "QBQTC": 30.25, + "SICK-R-PL": 69.46, + "SICKFr": 75.62, + "STS22": 35.8, + "STSB": 77.73, + "STSBenchmarkMultilingualSTS": 79.32 + } + ] + }, + "Summarization": { + "cosine_spearman": [ + { + "Model": "multilingual-e5-small", + "SummEval": 30.04, + "SummEvalFr": 31.14 + }, + { + "Model": "multilingual-e5-small", + "SummEval": 30.04, + "SummEvalFr": 31.14 + }, + { + "Model": "multilingual-e5-small", + "SummEvalFr": 31.85 } ] }, - "STS": { - "cosine_spearman": [ + "MultilabelClassification": { + "accuracy": [ { - "Model": "udever-bloom-560m", - "SICKFr": 54.54, - "STS22 (fr)": 61.35, - "STSBenchmarkMultilingualSTS (fr)": 36.78 + "Model": "multilingual-e5-small", + "CEDRClassification": 40.07, + "SensitiveTopicsClassification": 23.91 } ] }, - "Summarization": { - "cosine_spearman": [ + "InstructionRetrieval": { + "p-MRR": [ { - "Model": "udever-bloom-560m", - "SummEvalFr": 23.63 + "Model": "multilingual-e5-small", + "Core17InstructionRetrieval": -0.26, + "News21InstructionRetrieval": 0.54, + "Robust04InstructionRetrieval": -7.62 } ] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] } }, - "norbert3-large": { + "nb-bert-base": { "BitextMining": { "f1": [ { - "Model": "norbert3-large", - "BornholmBitextMining": 2.9 + "Model": "nb-bert-base", + "BornholmBitextMining": 9.88 } ] }, "Classification": { "accuracy": [ { - "Model": "norbert3-large", - "AngryTweetsClassification": 49.04, - "DKHateClassification": 62.71, - "DanishPoliticalCommentsClassification": 33.53, - "LccSentimentClassification": 46.93, - "MassiveIntentClassification (da)": 45.98, - "MassiveIntentClassification (nb)": 47.42, - "MassiveIntentClassification (sv)": 48.47, - "MassiveScenarioClassification (da)": 50.51, - "MassiveScenarioClassification (nb)": 54.25, - "MassiveScenarioClassification (sv)": 50.6, - "NoRecClassification": 50.46, - "NordicLangClassification": 84.25, - "NorwegianParliament": 58.85, - "ScalaDaClassification": 60.72, - "ScalaNbClassification": 66.79 + "Model": "nb-bert-base", + "AngryTweetsClassification": 52.14, + "DKHateClassification": 61.73, + "DanishPoliticalCommentsClassification": 34.84, + "LccSentimentClassification": 51.4, + "MassiveIntentClassification": 53.89, + "MassiveScenarioClassification": 55.37, + "NoRecClassification": 51.32, + "NordicLangClassification": 84.69, + "NorwegianParliament": 57.41, + "ScalaDaClassification": 57.99, + "ScalaNbClassification": 62.25 } ] }, @@ -15485,179 +15025,50 @@ "p-MRR": [] } }, - "sbert_large_nlu_ru": { + "nb-bert-large": { "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [ - { - "Model": "sbert_large_nlu_ru", - "GeoreviewClassification (rus-Cyrl)": 39.97, - "HeadlineClassification (rus-Cyrl)": 79.26, - "InappropriatenessClassification (rus-Cyrl)": 62.52, - "KinopoiskClassification (rus-Cyrl)": 49.51, - "MassiveIntentClassification (rus-Cyrl)": 61.09, - "MassiveScenarioClassification (rus-Cyrl)": 67.6, - "RuReviewsClassification (rus-Cyrl)": 58.27, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 53.9, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.04 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "sbert_large_nlu_ru", - "GeoreviewClusteringP2P (rus-Cyrl)": 57.12, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 49.7, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48 - } - ] - }, - "PairClassification": { - "max_ap": [ - { - "Model": "sbert_large_nlu_ru", - "TERRa (rus-Cyrl)": 50.17 - }, - { - "Model": "sbert_large_nlu_ru", - "TERRa (rus-Cyrl)": 50.17 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "sbert_large_nlu_ru", - "MIRACLReranking (rus-Cyrl)": 18.8 - }, - { - "Model": "sbert_large_nlu_ru", - "RuBQReranking (rus-Cyrl)": 46.81 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "sbert_large_nlu_ru", - "MIRACLRetrieval (rus-Cyrl)": 1.98, - "RiaNewsRetrieval (rus-Cyrl)": 11.11, - "RuBQRetrieval (rus-Cyrl)": 12.45 - } - ] - }, - "STS": { - "cosine_spearman": [ - { - "Model": "sbert_large_nlu_ru", - "RUParaPhraserSTS (rus-Cyrl)": 62.06, - "RuSTSBenchmarkSTS (rus-Cyrl)": 58.82, - "STS22 (rus-Cyrl)": 50.75 - } - ] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ + "f1": [ { - "Model": "sbert_large_nlu_ru", - "CEDRClassification (rus-Cyrl)": 35.84, - "SensitiveTopicsClassification (rus-Cyrl)": 27.9 + "Model": "nb-bert-large", + "BornholmBitextMining": 4.53 } ] }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "flaubert_large_cased": { - "BitextMining": { - "f1": [] - }, "Classification": { "accuracy": [ { - "Model": "flaubert_large_cased", - "AmazonReviewsClassification (fr)": 22.45, - "MTOPDomainClassification (fr)": 24.27, - "MTOPIntentClassification (fr)": 9.79, - "MasakhaNEWSClassification (fra)": 55.64, - "MassiveIntentClassification (fr)": 16.41, - "MassiveScenarioClassification (fr)": 22.72 + "Model": "nb-bert-large", + "AngryTweetsClassification": 52.14, + "DKHateClassification": 62.13, + "DanishPoliticalCommentsClassification": 35.04, + "LccSentimentClassification": 56.27, + "MassiveIntentClassification": 55.02, + "MassiveScenarioClassification": 57.12, + "NoRecClassification": 55.46, + "NordicLangClassification": 85.27, + "NorwegianParliament": 62.58, + "ScalaDaClassification": 62.85, + "ScalaNbClassification": 66.97 } ] }, "Clustering": { - "v_measure": [ - { - "Model": "flaubert_large_cased", - "AlloProfClusteringP2P": 40.85, - "AlloProfClusteringS2S": 21.76, - "HALClusteringS2S": 5.26, - "MLSUMClusteringP2P": 38.09, - "MLSUMClusteringS2S": 18.71, - "MasakhaNEWSClusteringP2P (fra)": 26.43, - "MasakhaNEWSClusteringS2S (fra)": 24.68 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "flaubert_large_cased", - "OpusparcusPC (fr)": 74.78, - "PawsXPairClassification (fr)": 54.14 - }, - { - "Model": "flaubert_large_cased", - "OpusparcusPC (fr)": 85.91, - "PawsXPairClassification (fr)": 54.94 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "flaubert_large_cased", - "AlloprofReranking": 26.29, - "SyntecReranking": 42.8 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "flaubert_large_cased", - "AlloprofRetrieval": 0.58, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.26, - "SyntecRetrieval": 1.58, - "XPQARetrieval (fr)": 3.69 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "flaubert_large_cased", - "SICKFr": 34.6, - "STS22 (fr)": 48.52, - "STSBenchmarkMultilingualSTS (fr)": 15.66 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "flaubert_large_cased", - "SummEvalFr": 29.25 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -15666,120 +15077,38 @@ "p-MRR": [] } }, - "sup-simcse-bert-base-uncased": { + "nomic-embed-text-v1": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "sup-simcse-bert-base-uncased", - "AmazonCounterfactualClassification (en)": 75.75, - "AmazonPolarityClassification": 82.47, - "AmazonReviewsClassification (en)": 39.6, - "Banking77Classification": 75.76, - "EmotionClassification": 44.81, - "ImdbClassification": 73.53, - "MTOPDomainClassification (en)": 84.29, - "MTOPIntentClassification (en)": 63.14, - "MassiveIntentClassification (en)": 65.95, - "MassiveScenarioClassification (en)": 70.78, - "ToxicConversationsClassification": 72.04, - "TweetSentimentExtractionClassification": 59.73 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "sup-simcse-bert-base-uncased", - "ArxivClusteringP2P": 35.18, - "ArxivClusteringS2S": 27.54, - "BiorxivClusteringP2P": 30.15, - "BiorxivClusteringS2S": 24.67, - "MedrxivClusteringP2P": 26.25, - "MedrxivClusteringS2S": 24.12, - "RedditClustering": 40.23, - "RedditClusteringP2P": 47.74, - "StackExchangeClustering": 47.55, - "StackExchangeClusteringP2P": 29.45, - "TwentyNewsgroupsClustering": 34.86 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "sup-simcse-bert-base-uncased", - "SprintDuplicateQuestions": 69.39, - "TwitterSemEval2015": 67.75, - "TwitterURLCorpus": 83.89 - }, - { - "Model": "sup-simcse-bert-base-uncased", - "SprintDuplicateQuestions": 73.04, - "TwitterSemEval2015": 67.75, - "TwitterURLCorpus": 83.89 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "sup-simcse-bert-base-uncased", - "AskUbuntuDupQuestions": 51.8, - "MindSmallReranking": 29.3, - "SciDocsRR": 70.14, - "StackOverflowDupQuestions": 38.9 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sup-simcse-bert-base-uncased", - "ArguAna": 38.33, - "CQADupstackRetrieval": 14.5, - "ClimateFEVER": 11.98, - "DBPedia": 19.73, - "FEVER": 20.41, - "FiQA2018": 10.41, - "HotpotQA": 22.9, - "MSMARCO": 11.0, - "NFCorpus": 12.42, - "NQ": 16.08, - "QuoraRetrieval": 79.62, - "SCIDOCS": 7.53, - "SciFact": 29.59, - "TRECCOVID": 22.93, - "Touche2020": 9.9 + "Model": "nomic-embed-text-v1", + "LEMBNarrativeQARetrieval": 41.23, + "LEMBQMSumRetrieval": 36.65, + "LEMBSummScreenFDRetrieval": 92.97, + "LEMBWikimQARetrieval": 73.75 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "sup-simcse-bert-base-uncased", - "BIOSSES": 68.38, - "SICK-R": 80.77, - "STS12": 75.3, - "STS13": 84.67, - "STS14": 80.19, - "STS15": 85.4, - "STS16": 80.82, - "STS17 (en-en)": 89.44, - "STS22 (en)": 61.96, - "STSBenchmark": 84.25 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "sup-simcse-bert-base-uncased", - "SummEval": 31.17 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -15788,59 +15117,120 @@ "p-MRR": [] } }, - "SFR-Embedding-Mistral": { + "nomic-embed-text-v1.5-128": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "nomic-embed-text-v1.5-128", + "AmazonCounterfactualClassification": 69.78, + "AmazonPolarityClassification": 88.74, + "AmazonReviewsClassification": 43.11, + "Banking77Classification": 82.78, + "EmotionClassification": 42.92, + "ImdbClassification": 80.87, + "MTOPDomainClassification": 89.61, + "MTOPIntentClassification": 68.9, + "MassiveIntentClassification": 69.34, + "MassiveScenarioClassification": 74.21, + "ToxicConversationsClassification": 68.16, + "TweetSentimentExtractionClassification": 57.99 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "nomic-embed-text-v1.5-128", + "ArxivClusteringP2P": 43.87, + "ArxivClusteringS2S": 34.57, + "BiorxivClusteringP2P": 36.79, + "BiorxivClusteringS2S": 30.68, + "MedrxivClusteringP2P": 34.09, + "MedrxivClusteringS2S": 31.3, + "RedditClustering": 53.31, + "RedditClusteringP2P": 58.96, + "StackExchangeClustering": 59.92, + "StackExchangeClusteringP2P": 33.88, + "TwentyNewsgroupsClustering": 47.29 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "nomic-embed-text-v1.5-128", + "SprintDuplicateQuestions": 91.45, + "TwitterSemEval2015": 73.23, + "TwitterURLCorpus": 85.93 + }, + { + "Model": "nomic-embed-text-v1.5-128", + "SprintDuplicateQuestions": 91.45, + "TwitterSemEval2015": 73.23, + "TwitterURLCorpus": 85.93 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "nomic-embed-text-v1.5-128", + "AskUbuntuDupQuestions": 61.16, + "MindSmallReranking": 30.02, + "SciDocsRR": 78.05, + "StackOverflowDupQuestions": 49.0 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "SFR-Embedding-Mistral", - "BrightRetrieval (sustainable_living)": 19.79, - "BrightRetrieval (economics)": 17.84, - "BrightRetrieval (theoremqa_theorems)": 24.32, - "BrightRetrieval (aops)": 7.43, - "BrightRetrieval (theoremqa_questions)": 23.05, - "BrightRetrieval (psychology)": 18.97, - "BrightRetrieval (stackoverflow)": 12.72, - "BrightRetrieval (pony)": 1.97, - "BrightRetrieval (leetcode)": 27.35, - "BrightRetrieval (biology)": 19.49, - "BrightRetrieval (earth_science)": 26.63, - "BrightRetrieval (robotics)": 16.7 - } - ], - "recall_at_1": [ - { - "Model": "SFR-Embedding-Mistral", - "BrightRetrieval (earth_science)": 37.0, - "BrightRetrieval (biology)": 30.26, - "BrightRetrieval (stackoverflow)": 14.53, - "BrightRetrieval (sustainable_living)": 34.99, - "BrightRetrieval (psychology)": 47.72, - "BrightRetrieval (pony)": 2.0, - "BrightRetrieval (economics)": 24.27, - "BrightRetrieval (robotics)": 17.33 + "Model": "nomic-embed-text-v1.5-128", + "ArguAna": 43.4, + "CQADupstackRetrieval": 34.67, + "ClimateFEVER": 36.52, + "DBPedia": 36.22, + "FEVER": 80.48, + "FiQA2018": 32.08, + "HotpotQA": 60.09, + "MSMARCO": 39.99, + "NFCorpus": 30.72, + "NQ": 53.62, + "QuoraRetrieval": 87.07, + "SCIDOCS": 15.56, + "SciFact": 64.28, + "TRECCOVID": 74.58, + "Touche2020": 26.99 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "nomic-embed-text-v1.5-128", + "BIOSSES": 80.19, + "SICK-R": 79.09, + "STS12": 77.49, + "STS13": 85.62, + "STS14": 80.5, + "STS15": 85.84, + "STS16": 83.9, + "STS17": 86.27, + "STS22": 64.24, + "STSBenchmark": 84.28 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "nomic-embed-text-v1.5-128", + "SummEval": 29.59 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -15849,118 +15239,118 @@ "p-MRR": [] } }, - "unsup-simcse-bert-base-uncased": { + "nomic-embed-text-v1.5-256": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "unsup-simcse-bert-base-uncased", - "AmazonCounterfactualClassification (en)": 67.09, - "AmazonPolarityClassification": 74.48, - "AmazonReviewsClassification (en)": 33.85, - "Banking77Classification": 73.55, - "EmotionClassification": 42.22, - "ImdbClassification": 69.63, - "MTOPDomainClassification (en)": 81.71, - "MTOPIntentClassification (en)": 59.23, - "MassiveIntentClassification (en)": 59.84, - "MassiveScenarioClassification (en)": 66.25, - "ToxicConversationsClassification": 68.82, - "TweetSentimentExtractionClassification": 53.36 + "Model": "nomic-embed-text-v1.5-256", + "AmazonCounterfactualClassification": 72.94, + "AmazonPolarityClassification": 91.35, + "AmazonReviewsClassification": 45.73, + "Banking77Classification": 83.69, + "EmotionClassification": 45.88, + "ImdbClassification": 83.99, + "MTOPDomainClassification": 91.68, + "MTOPIntentClassification": 72.47, + "MassiveIntentClassification": 71.76, + "MassiveScenarioClassification": 75.67, + "ToxicConversationsClassification": 70.87, + "TweetSentimentExtractionClassification": 59.2 } ] }, "Clustering": { "v_measure": [ { - "Model": "unsup-simcse-bert-base-uncased", - "ArxivClusteringP2P": 32.61, - "ArxivClusteringS2S": 24.68, - "BiorxivClusteringP2P": 24.9, - "BiorxivClusteringS2S": 19.55, - "MedrxivClusteringP2P": 23.6, - "MedrxivClusteringS2S": 21.97, - "RedditClustering": 32.18, - "RedditClusteringP2P": 45.14, - "StackExchangeClustering": 43.07, - "StackExchangeClusteringP2P": 28.5, - "TwentyNewsgroupsClustering": 23.21 + "Model": "nomic-embed-text-v1.5-256", + "ArxivClusteringP2P": 44.82, + "ArxivClusteringS2S": 35.32, + "BiorxivClusteringP2P": 38.19, + "BiorxivClusteringS2S": 31.83, + "MedrxivClusteringP2P": 34.08, + "MedrxivClusteringS2S": 30.98, + "RedditClustering": 54.92, + "RedditClusteringP2P": 60.23, + "StackExchangeClustering": 61.81, + "StackExchangeClusteringP2P": 34.03, + "TwentyNewsgroupsClustering": 48.56 } ] }, "PairClassification": { "max_ap": [ { - "Model": "unsup-simcse-bert-base-uncased", - "SprintDuplicateQuestions": 69.41, - "TwitterSemEval2015": 60.21, - "TwitterURLCorpus": 81.37 + "Model": "nomic-embed-text-v1.5-256", + "SprintDuplicateQuestions": 92.31, + "TwitterSemEval2015": 73.61, + "TwitterURLCorpus": 86.34 }, { - "Model": "unsup-simcse-bert-base-uncased", - "SprintDuplicateQuestions": 78.03, - "TwitterSemEval2015": 61.01, - "TwitterURLCorpus": 81.37 + "Model": "nomic-embed-text-v1.5-256", + "SprintDuplicateQuestions": 92.31, + "TwitterSemEval2015": 73.61, + "TwitterURLCorpus": 86.34 } ] }, "Reranking": { "map": [ { - "Model": "unsup-simcse-bert-base-uncased", - "AskUbuntuDupQuestions": 51.57, - "MindSmallReranking": 28.62, - "SciDocsRR": 66.33, - "StackOverflowDupQuestions": 39.35 + "Model": "nomic-embed-text-v1.5-256", + "AskUbuntuDupQuestions": 61.34, + "MindSmallReranking": 30.04, + "SciDocsRR": 79.4, + "StackOverflowDupQuestions": 49.95 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "unsup-simcse-bert-base-uncased", - "ArguAna": 38.34, - "CQADupstackRetrieval": 13.22, - "ClimateFEVER": 11.8, - "DBPedia": 15.04, - "FEVER": 21.06, - "FiQA2018": 9.84, - "HotpotQA": 19.75, - "MSMARCO": 9.35, - "NFCorpus": 9.88, - "NQ": 11.69, - "QuoraRetrieval": 78.03, - "SCIDOCS": 5.5, - "SciFact": 25.72, - "TRECCOVID": 26.2, - "Touche2020": 8.9 + "Model": "nomic-embed-text-v1.5-256", + "ArguAna": 45.44, + "CQADupstackRetrieval": 37.61, + "ClimateFEVER": 39.63, + "DBPedia": 39.42, + "FEVER": 84.4, + "FiQA2018": 35.0, + "HotpotQA": 67.78, + "MSMARCO": 41.38, + "NFCorpus": 32.54, + "NQ": 57.1, + "QuoraRetrieval": 87.65, + "SCIDOCS": 16.76, + "SciFact": 68.24, + "TRECCOVID": 80.65, + "Touche2020": 28.49 } ] }, "STS": { "cosine_spearman": [ { - "Model": "unsup-simcse-bert-base-uncased", - "BIOSSES": 72.31, - "SICK-R": 72.24, - "STS12": 66.05, - "STS13": 81.49, - "STS14": 73.61, - "STS15": 79.72, - "STS16": 78.12, - "STS17 (en-en)": 83.58, - "STS22 (en)": 59.65, - "STSBenchmark": 76.52 + "Model": "nomic-embed-text-v1.5-256", + "BIOSSES": 81.58, + "SICK-R": 79.24, + "STS12": 78.16, + "STS13": 86.01, + "STS14": 81.25, + "STS15": 86.51, + "STS16": 84.24, + "STS17": 86.44, + "STS22": 65.14, + "STSBenchmark": 84.8 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "unsup-simcse-bert-base-uncased", - "SummEval": 31.15 + "Model": "nomic-embed-text-v1.5-256", + "SummEval": 30.05 } ] }, @@ -15971,87 +15361,118 @@ "p-MRR": [] } }, - "universal-sentence-encoder-multilingual-3": { + "nomic-embed-text-v1.5-512": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "AmazonReviewsClassification (fr)": 33.51, - "MTOPDomainClassification (fr)": 85.5, - "MTOPIntentClassification (fr)": 53.98, - "MasakhaNEWSClassification (fra)": 82.06, - "MassiveIntentClassification (fr)": 61.19, - "MassiveScenarioClassification (fr)": 70.22 + "Model": "nomic-embed-text-v1.5-512", + "AmazonCounterfactualClassification": 74.27, + "AmazonPolarityClassification": 91.89, + "AmazonReviewsClassification": 46.97, + "Banking77Classification": 84.15, + "EmotionClassification": 47.73, + "ImdbClassification": 85.47, + "MTOPDomainClassification": 92.62, + "MTOPIntentClassification": 74.27, + "MassiveIntentClassification": 73.07, + "MassiveScenarioClassification": 76.82, + "ToxicConversationsClassification": 71.25, + "TweetSentimentExtractionClassification": 60.4 } ] }, "Clustering": { "v_measure": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloProfClusteringP2P": 56.9, - "AlloProfClusteringS2S": 37.84, - "HALClusteringS2S": 18.95, - "MLSUMClusteringP2P": 43.9, - "MLSUMClusteringS2S": 35.5, - "MasakhaNEWSClusteringP2P (fra)": 60.57, - "MasakhaNEWSClusteringS2S (fra)": 40.31 + "Model": "nomic-embed-text-v1.5-512", + "ArxivClusteringP2P": 45.45, + "ArxivClusteringS2S": 36.19, + "BiorxivClusteringP2P": 38.41, + "BiorxivClusteringS2S": 32.28, + "MedrxivClusteringP2P": 34.47, + "MedrxivClusteringS2S": 31.43, + "RedditClustering": 55.9, + "RedditClusteringP2P": 60.58, + "StackExchangeClustering": 62.94, + "StackExchangeClusteringP2P": 33.81, + "TwentyNewsgroupsClustering": 49.36 } ] }, "PairClassification": { "max_ap": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "OpusparcusPC (fr)": 91.46, - "PawsXPairClassification (fr)": 52.39 + "Model": "nomic-embed-text-v1.5-512", + "SprintDuplicateQuestions": 92.91, + "TwitterSemEval2015": 74.3, + "TwitterURLCorpus": 86.57 }, { - "Model": "universal-sentence-encoder-multilingual-3", - "OpusparcusPC (fr)": 91.46, - "PawsXPairClassification (fr)": 52.41 + "Model": "nomic-embed-text-v1.5-512", + "SprintDuplicateQuestions": 92.91, + "TwitterSemEval2015": 74.3, + "TwitterURLCorpus": 86.57 } ] }, "Reranking": { "map": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloprofReranking": 56.23, - "SyntecReranking": 73.85 + "Model": "nomic-embed-text-v1.5-512", + "AskUbuntuDupQuestions": 61.6, + "MindSmallReranking": 30.34, + "SciDocsRR": 80.33, + "StackOverflowDupQuestions": 50.32 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloprofRetrieval": 35.27, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 26.12, - "SyntecRetrieval": 69.82, - "XPQARetrieval (fr)": 59.59 + "Model": "nomic-embed-text-v1.5-512", + "ArguAna": 47.45, + "CQADupstackRetrieval": 39.06, + "ClimateFEVER": 40.7, + "DBPedia": 42.96, + "FEVER": 85.7, + "FiQA2018": 36.92, + "HotpotQA": 71.48, + "MSMARCO": 42.29, + "NFCorpus": 33.31, + "NQ": 58.83, + "QuoraRetrieval": 87.87, + "SCIDOCS": 17.88, + "SciFact": 70.12, + "TRECCOVID": 82.12, + "Touche2020": 29.24 } ] }, "STS": { "cosine_spearman": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "SICKFr": 71.37, - "STS22 (fr)": 77.91, - "STSBenchmarkMultilingualSTS (fr)": 75.48 + "Model": "nomic-embed-text-v1.5-512", + "BIOSSES": 83.3, + "SICK-R": 79.27, + "STS12": 78.3, + "STS13": 85.81, + "STS14": 81.38, + "STS15": 86.79, + "STS16": 84.56, + "STS17": 87.25, + "STS22": 65.24, + "STSBenchmark": 85.14 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "SummEvalFr": 28.21 + "Model": "nomic-embed-text-v1.5-512", + "SummEval": 30.47 } ] }, @@ -16062,179 +15483,118 @@ "p-MRR": [] } }, - "gte-Qwen1.5-7B-instruct": { + "nomic-embed-text-v1.5-64": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "AmazonCounterfactualClassification (en)": 83.16, - "AmazonPolarityClassification": 96.7, - "AmazonReviewsClassification (en)": 62.17, - "AmazonReviewsClassification (zh)": 52.95, - "Banking77Classification": 81.68, - "EmotionClassification": 54.53, - "IFlyTek": 53.77, - "ImdbClassification": 95.58, - "JDReview": 88.2, - "MTOPDomainClassification (en)": 95.75, - "MTOPIntentClassification (en)": 84.26, - "MassiveIntentClassification (zh-CN)": 76.25, - "MassiveIntentClassification (en)": 78.47, - "MassiveScenarioClassification (en)": 78.19, - "MassiveScenarioClassification (zh-CN)": 77.26, - "MultilingualSentiment": 77.42, - "OnlineShopping": 94.48, - "TNews": 51.24, - "ToxicConversationsClassification": 78.75, - "TweetSentimentExtractionClassification": 66.0, - "Waimai": 88.63 + "Model": "nomic-embed-text-v1.5-64", + "AmazonCounterfactualClassification": 66.85, + "AmazonPolarityClassification": 85.92, + "AmazonReviewsClassification": 41.02, + "Banking77Classification": 80.63, + "EmotionClassification": 40.55, + "ImdbClassification": 76.6, + "MTOPDomainClassification": 86.31, + "MTOPIntentClassification": 62.77, + "MassiveIntentClassification": 64.95, + "MassiveScenarioClassification": 70.38, + "ToxicConversationsClassification": 66.53, + "TweetSentimentExtractionClassification": 55.23 } ] }, "Clustering": { "v_measure": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "ArxivClusteringP2P": 56.4, - "ArxivClusteringS2S": 51.45, - "BiorxivClusteringP2P": 49.01, - "BiorxivClusteringS2S": 45.06, - "CLSClusteringP2P": 47.21, - "CLSClusteringS2S": 45.79, - "MedrxivClusteringP2P": 44.37, - "MedrxivClusteringS2S": 42.0, - "RedditClustering": 73.37, - "RedditClusteringP2P": 72.51, - "StackExchangeClustering": 79.07, - "StackExchangeClusteringP2P": 49.57, - "ThuNewsClusteringP2P": 87.43, - "ThuNewsClusteringS2S": 87.9, - "TwentyNewsgroupsClustering": 51.31 + "Model": "nomic-embed-text-v1.5-64", + "ArxivClusteringP2P": 41.8, + "ArxivClusteringS2S": 32.41, + "BiorxivClusteringP2P": 34.81, + "BiorxivClusteringS2S": 28.59, + "MedrxivClusteringP2P": 32.73, + "MedrxivClusteringS2S": 29.91, + "RedditClustering": 50.31, + "RedditClusteringP2P": 56.57, + "StackExchangeClustering": 57.99, + "StackExchangeClusteringP2P": 33.64, + "TwentyNewsgroupsClustering": 44.61 } ] }, "PairClassification": { "max_ap": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "Cmnli": 91.81, - "Ocnli": 85.22, - "SprintDuplicateQuestions": 95.99, - "TwitterSemEval2015": 79.36, - "TwitterURLCorpus": 86.79 + "Model": "nomic-embed-text-v1.5-64", + "SprintDuplicateQuestions": 90.06, + "TwitterSemEval2015": 71.68, + "TwitterURLCorpus": 85.03 }, { - "Model": "gte-Qwen1.5-7B-instruct", - "Cmnli": 91.85, - "Ocnli": 85.28, - "SprintDuplicateQuestions": 96.07, - "TwitterSemEval2015": 79.36, - "TwitterURLCorpus": 86.79 + "Model": "nomic-embed-text-v1.5-64", + "SprintDuplicateQuestions": 90.06, + "TwitterSemEval2015": 71.68, + "TwitterURLCorpus": 85.03 } ] }, "Reranking": { "map": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "AskUbuntuDupQuestions": 66.0, - "CMedQAv1": 86.37, - "CMedQAv2": 87.41, - "MindSmallReranking": 32.71, - "SciDocsRR": 87.89, - "StackOverflowDupQuestions": 53.93, - "T2Reranking": 68.11 + "Model": "nomic-embed-text-v1.5-64", + "AskUbuntuDupQuestions": 60.79, + "MindSmallReranking": 29.7, + "SciDocsRR": 75.79, + "StackOverflowDupQuestions": 47.42 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "ArguAna": 62.65, - "BrightRetrieval (stackoverflow)": 19.85, - "BrightRetrieval (earth_science)": 36.22, - "BrightRetrieval (leetcode)": 25.46, - "BrightRetrieval (theoremqa_questions)": 26.97, - "BrightRetrieval (economics)": 17.72, - "BrightRetrieval (robotics)": 13.47, - "BrightRetrieval (pony)": 9.79, - "BrightRetrieval (aops)": 14.36, - "BrightRetrieval (psychology)": 24.61, - "BrightRetrieval (theoremqa_theorems)": 30.8, - "BrightRetrieval (biology)": 30.92, - "BrightRetrieval (sustainable_living)": 14.93, - "CQADupstackRetrieval": 40.64, - "ClimateFEVER": 44.0, - "CmedqaRetrieval": 43.47, - "CovidRetrieval": 80.87, - "DBPedia": 48.04, - "DuRetrieval": 86.01, - "EcomRetrieval": 66.46, - "FEVER": 93.35, - "FiQA2018": 55.31, - "HotpotQA": 72.25, - "MMarcoRetrieval": 73.83, - "MSMARCO": 41.68, - "MedicalRetrieval": 61.33, - "NFCorpus": 38.25, - "NQ": 61.79, - "QuoraRetrieval": 89.61, - "SCIDOCS": 27.69, - "SciFact": 75.31, - "T2Retrieval": 83.58, - "TRECCOVID": 72.72, - "Touche2020": 20.3, - "VideoRetrieval": 69.41 - } - ], - "recall_at_1": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "BrightRetrieval (economics)": 25.73, - "BrightRetrieval (pony)": 1.32, - "BrightRetrieval (robotics)": 21.29, - "BrightRetrieval (biology)": 39.24, - "BrightRetrieval (earth_science)": 36.13, - "BrightRetrieval (stackoverflow)": 23.5, - "BrightRetrieval (psychology)": 42.28, - "BrightRetrieval (sustainable_living)": 33.1 - } - ] - }, - "STS": { - "cosine_spearman": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "AFQMC": 58.47, - "ATEC": 55.46, - "BIOSSES": 81.12, - "BQ": 77.59, - "LCQMC": 76.29, - "PAWSX": 50.22, - "QBQTC": 31.82, - "SICK-R": 79.15, - "STS12": 76.52, - "STS13": 88.63, - "STS14": 83.32, - "STS15": 87.5, - "STS16": 86.39, - "STS17 (en-en)": 87.79, - "STS22 (en)": 66.4, - "STS22 (zh)": 67.36, - "STSB": 81.37, - "STSBenchmark": 87.35 + "Model": "nomic-embed-text-v1.5-64", + "ArguAna": 37.16, + "CQADupstackRetrieval": 28.72, + "ClimateFEVER": 31.48, + "DBPedia": 28.19, + "FEVER": 70.24, + "FiQA2018": 25.78, + "HotpotQA": 43.07, + "MSMARCO": 35.95, + "NFCorpus": 26.03, + "NQ": 45.54, + "QuoraRetrieval": 85.83, + "SCIDOCS": 12.09, + "SciFact": 52.71, + "TRECCOVID": 67.83, + "Touche2020": 23.13 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "nomic-embed-text-v1.5-64", + "BIOSSES": 77.18, + "SICK-R": 78.76, + "STS12": 77.3, + "STS13": 84.18, + "STS14": 79.37, + "STS15": 84.69, + "STS16": 83.36, + "STS17": 85.73, + "STS22": 63.83, + "STSBenchmark": 83.46 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "SummEval": 31.46 + "Model": "nomic-embed-text-v1.5-64", + "SummEval": 28.41 } ] }, @@ -16245,24 +15605,36 @@ "p-MRR": [] } }, - "gottbert-base": { + "norbert3-base": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "norbert3-base", + "BornholmBitextMining": 6.08 + } + ] }, "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [ + "accuracy": [ { - "Model": "gottbert-base", - "BlurbsClusteringP2P": 34.49, - "BlurbsClusteringS2S": 8.37, - "TenKGnadClusteringP2P": 33.66, - "TenKGnadClusteringS2S": 9.34 + "Model": "norbert3-base", + "AngryTweetsClassification": 52.48, + "DKHateClassification": 58.78, + "DanishPoliticalCommentsClassification": 34.14, + "LccSentimentClassification": 54.07, + "MassiveIntentClassification": 52.08, + "MassiveScenarioClassification": 53.53, + "NoRecClassification": 53.4, + "NordicLangClassification": 82.67, + "NorwegianParliament": 59.33, + "ScalaDaClassification": 58.25, + "ScalaNbClassification": 60.19 } ] }, + "Clustering": { + "v_measure": [] + }, "PairClassification": { "max_ap": [] }, @@ -16285,12 +15657,32 @@ "p-MRR": [] } }, - "llama-2-7b-chat": { + "norbert3-large": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "norbert3-large", + "BornholmBitextMining": 2.9 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "norbert3-large", + "AngryTweetsClassification": 49.04, + "DKHateClassification": 62.71, + "DanishPoliticalCommentsClassification": 33.53, + "LccSentimentClassification": 46.93, + "MassiveIntentClassification": 48.47, + "MassiveScenarioClassification": 50.6, + "NoRecClassification": 50.46, + "NordicLangClassification": 84.25, + "NorwegianParliament": 58.85, + "ScalaDaClassification": 60.72, + "ScalaNbClassification": 66.79 + } + ] }, "Clustering": { "v_measure": [] @@ -16314,377 +15706,1518 @@ "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "llama-2-7b-chat", - "Core17InstructionRetrieval": 2.84, - "News21InstructionRetrieval": 0.23, - "Robust04InstructionRetrieval": 2.0 - } - ] + "p-MRR": [] } }, - "voyage-code-2": { + "paraphrase-multilingual-MiniLM-L12-v2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "BUCC": 95.63, + "BornholmBitextMining": 19.67, + "Tatoeba": 95.31, + "Tatoeba (est-eng)": 97.33, + "Tatoeba (aze-eng)": 62.1, + "Tatoeba (oci-eng)": 38.57, + "Tatoeba (eus-eng)": 23.18, + "Tatoeba (sqi-eng)": 98.17, + "Tatoeba (yue-eng)": 71.45, + "Tatoeba (ara-eng)": 87.93, + "Tatoeba (wuu-eng)": 76.0, + "Tatoeba (lvs-eng)": 97.87, + "Tatoeba (ron-eng)": 95.3, + "Tatoeba (uzb-eng)": 17.14, + "Tatoeba (ell-eng)": 95.43, + "Tatoeba (lit-eng)": 93.16, + "Tatoeba (isl-eng)": 24.07, + "Tatoeba (awa-eng)": 33.43, + "Tatoeba (tuk-eng)": 15.16, + "Tatoeba (nld-eng)": 94.58, + "Tatoeba (ido-eng)": 40.25, + "Tatoeba (afr-eng)": 58.22, + "Tatoeba (cmn-eng)": 94.93, + "Tatoeba (max-eng)": 45.25, + "Tatoeba (tgl-eng)": 13.09, + "Tatoeba (ind-eng)": 92.74, + "Tatoeba (mkd-eng)": 91.0, + "Tatoeba (bel-eng)": 67.73, + "Tatoeba (hrv-eng)": 95.98, + "Tatoeba (bre-eng)": 5.56, + "Tatoeba (bul-eng)": 92.65, + "Tatoeba (hun-eng)": 91.58, + "Tatoeba (srp-eng)": 92.24, + "Tatoeba (pol-eng)": 94.28, + "Tatoeba (mhr-eng)": 6.89, + "Tatoeba (fra-eng)": 91.72, + "Tatoeba (lfn-eng)": 47.02, + "Tatoeba (cha-eng)": 15.98, + "Tatoeba (hsb-eng)": 36.1, + "Tatoeba (rus-eng)": 91.87, + "Tatoeba (khm-eng)": 32.11, + "Tatoeba (arq-eng)": 18.6, + "Tatoeba (gla-eng)": 3.61, + "Tatoeba (ces-eng)": 95.12, + "Tatoeba (ita-eng)": 93.05, + "Tatoeba (csb-eng)": 21.56, + "Tatoeba (kor-eng)": 92.52, + "Tatoeba (ile-eng)": 57.71, + "Tatoeba (xho-eng)": 4.52, + "Tatoeba (deu-eng)": 97.02, + "Tatoeba (heb-eng)": 86.88, + "Tatoeba (yid-eng)": 14.38, + "Tatoeba (cym-eng)": 13.25, + "Tatoeba (jpn-eng)": 90.41, + "Tatoeba (tzl-eng)": 25.46, + "Tatoeba (ast-eng)": 62.17, + "Tatoeba (war-eng)": 7.25, + "Tatoeba (tha-eng)": 96.72, + "Tatoeba (fry-eng)": 31.13, + "Tatoeba (dtp-eng)": 5.69, + "Tatoeba (hye-eng)": 93.28, + "Tatoeba (mar-eng)": 92.38, + "Tatoeba (cbk-eng)": 55.37, + "Tatoeba (uig-eng)": 24.39, + "Tatoeba (cor-eng)": 3.42, + "Tatoeba (nov-eng)": 47.99, + "Tatoeba (slv-eng)": 96.92, + "Tatoeba (kzj-eng)": 6.24, + "Tatoeba (dan-eng)": 94.8, + "Tatoeba (hin-eng)": 97.62, + "Tatoeba (ukr-eng)": 92.82, + "Tatoeba (jav-eng)": 17.04, + "Tatoeba (swe-eng)": 94.42, + "Tatoeba (swg-eng)": 26.31, + "Tatoeba (cat-eng)": 94.42, + "Tatoeba (ina-eng)": 79.13, + "Tatoeba (mal-eng)": 32.2, + "Tatoeba (gle-eng)": 11.62, + "Tatoeba (tel-eng)": 36.4, + "Tatoeba (zsm-eng)": 95.31, + "Tatoeba (swh-eng)": 14.48, + "Tatoeba (tam-eng)": 24.64, + "Tatoeba (epo-eng)": 41.73, + "Tatoeba (pms-eng)": 30.7, + "Tatoeba (mon-eng)": 95.04, + "Tatoeba (kat-eng)": 95.44, + "Tatoeba (urd-eng)": 94.57, + "Tatoeba (vie-eng)": 95.12, + "Tatoeba (fin-eng)": 93.1, + "Tatoeba (tur-eng)": 95.08, + "Tatoeba (arz-eng)": 51.26, + "Tatoeba (fao-eng)": 27.51, + "Tatoeba (pes-eng)": 92.59, + "Tatoeba (tat-eng)": 10.25, + "Tatoeba (nds-eng)": 32.16, + "Tatoeba (nno-eng)": 76.34, + "Tatoeba (ber-eng)": 4.43, + "Tatoeba (bos-eng)": 93.27, + "Tatoeba (slk-eng)": 95.15, + "Tatoeba (spa-eng)": 95.42, + "Tatoeba (pam-eng)": 5.41, + "Tatoeba (ben-eng)": 36.48, + "Tatoeba (ang-eng)": 10.24, + "Tatoeba (kur-eng)": 46.94, + "Tatoeba (por-eng)": 92.13, + "Tatoeba (orv-eng)": 15.1, + "Tatoeba (dsb-eng)": 33.43, + "Tatoeba (amh-eng)": 36.21, + "Tatoeba (kab-eng)": 1.16, + "Tatoeba (kaz-eng)": 34.89, + "Tatoeba (nob-eng)": 97.73, + "Tatoeba (ceb-eng)": 8.05, + "Tatoeba (lat-eng)": 19.47, + "Tatoeba (glg-eng)": 94.0, + "Tatoeba (gsw-eng)": 25.74 + } + ] }, "Classification": { "accuracy": [ { - "Model": "voyage-code-2", - "AmazonReviewsClassification (fr)": 42.15, - "MTOPDomainClassification (fr)": 87.68, - "MTOPIntentClassification (fr)": 59.44, - "MasakhaNEWSClassification (fra)": 82.13, - "MassiveIntentClassification (fr)": 63.08, - "MassiveScenarioClassification (fr)": 70.15 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "AllegroReviews": 30.85, + "AmazonCounterfactualClassification": 63.45, + "AmazonCounterfactualClassification (en-ext)": 70.01, + "AmazonCounterfactualClassification (en)": 71.55, + "AmazonCounterfactualClassification (de)": 68.36, + "AmazonCounterfactualClassification (ja)": 63.37, + "AmazonPolarityClassification": 69.21, + "AmazonReviewsClassification": 35.26, + "AmazonReviewsClassification (en)": 35.12, + "AmazonReviewsClassification (de)": 35.91, + "AmazonReviewsClassification (es)": 37.49, + "AmazonReviewsClassification (fr)": 35.29, + "AmazonReviewsClassification (ja)": 33.21, + "AmazonReviewsClassification (zh)": 35.24, + "AngryTweetsClassification": 50.9, + "Banking77Classification": 79.8, + "CBD": 57.71, + "DanishPoliticalCommentsClassification": 37.58, + "EmotionClassification": 42.32, + "GeoreviewClassification": 38.24, + "HeadlineClassification": 68.3, + "IFlyTek": 39.88, + "ImdbClassification": 60.46, + "InappropriatenessClassification": 58.18, + "JDReview": 70.26, + "KinopoiskClassification": 41.45, + "LccSentimentClassification": 54.53, + "MTOPDomainClassification": 79.99, + "MTOPDomainClassification (en)": 87.03, + "MTOPDomainClassification (de)": 79.21, + "MTOPDomainClassification (es)": 83.06, + "MTOPDomainClassification (fr)": 78.64, + "MTOPDomainClassification (hi)": 81.36, + "MTOPDomainClassification (th)": 79.97, + "MTOPIntentClassification": 61.96, + "MTOPIntentClassification (en)": 65.5, + "MTOPIntentClassification (de)": 54.21, + "MTOPIntentClassification (es)": 60.3, + "MTOPIntentClassification (fr)": 54.01, + "MTOPIntentClassification (hi)": 59.92, + "MTOPIntentClassification (th)": 61.97, + "MasakhaNEWSClassification": 76.09, + "MasakhaNEWSClassification (amh)": 64.28, + "MasakhaNEWSClassification (eng)": 74.7, + "MasakhaNEWSClassification (fra)": 71.68, + "MasakhaNEWSClassification (hau)": 47.96, + "MasakhaNEWSClassification (ibo)": 42.46, + "MasakhaNEWSClassification (lin)": 59.26, + "MasakhaNEWSClassification (lug)": 42.29, + "MasakhaNEWSClassification (orm)": 34.98, + "MasakhaNEWSClassification (pcm)": 89.54, + "MasakhaNEWSClassification (run)": 47.2, + "MasakhaNEWSClassification (sna)": 57.56, + "MasakhaNEWSClassification (som)": 34.8, + "MasakhaNEWSClassification (swa)": 46.05, + "MasakhaNEWSClassification (tir)": 27.94, + "MasakhaNEWSClassification (xho)": 44.81, + "MasakhaNEWSClassification (yor)": 52.92, + "MassiveIntentClassification": 57.52, + "MassiveIntentClassification (de)": 50.71, + "MassiveIntentClassification (he)": 52.55, + "MassiveIntentClassification (th)": 58.92, + "MassiveIntentClassification (sw)": 29.56, + "MassiveIntentClassification (zh-TW)": 58.74, + "MassiveIntentClassification (vi)": 56.62, + "MassiveIntentClassification (ko)": 50.36, + "MassiveIntentClassification (en)": 66.89, + "MassiveIntentClassification (hy)": 51.6, + "MassiveIntentClassification (tl)": 33.67, + "MassiveIntentClassification (nl)": 59.52, + "MassiveIntentClassification (ur)": 52.79, + "MassiveIntentClassification (fi)": 57.56, + "MassiveIntentClassification (am)": 36.77, + "MassiveIntentClassification (ka)": 43.03, + "MassiveIntentClassification (it)": 59.66, + "MassiveIntentClassification (kn)": 41.0, + "MassiveIntentClassification (cy)": 26.13, + "MassiveIntentClassification (lv)": 54.72, + "MassiveIntentClassification (sq)": 56.6, + "MassiveIntentClassification (ms)": 54.81, + "MassiveIntentClassification (bn)": 35.38, + "MassiveIntentClassification (hi)": 58.37, + "MassiveIntentClassification (hu)": 60.44, + "MassiveIntentClassification (nb)": 55.52, + "MassiveIntentClassification (mn)": 51.77, + "MassiveIntentClassification (sl)": 57.35, + "MassiveIntentClassification (tr)": 59.91, + "MassiveIntentClassification (fr)": 60.24, + "MassiveIntentClassification (id)": 59.9, + "MassiveIntentClassification (my)": 52.03, + "MassiveIntentClassification (zh-CN)": 62.0, + "MassiveIntentClassification (pl)": 59.48, + "MassiveIntentClassification (es)": 59.7, + "MassiveIntentClassification (ja)": 60.9, + "MassiveIntentClassification (ta)": 36.82, + "MassiveIntentClassification (el)": 58.7, + "MassiveIntentClassification (ar)": 45.15, + "MassiveIntentClassification (pt)": 61.29, + "MassiveIntentClassification (jv)": 32.37, + "MassiveIntentClassification (af)": 45.87, + "MassiveIntentClassification (ru)": 59.06, + "MassiveIntentClassification (te)": 40.77, + "MassiveIntentClassification (az)": 47.43, + "MassiveIntentClassification (is)": 30.87, + "MassiveIntentClassification (ml)": 42.44, + "MassiveIntentClassification (ro)": 58.44, + "MassiveIntentClassification (km)": 40.04, + "MassiveIntentClassification (fa)": 61.03, + "MassiveIntentClassification (da)": 57.75, + "MassiveIntentClassification (sv)": 59.43, + "MassiveScenarioClassification": 64.52, + "MassiveScenarioClassification (zh-CN)": 67.45, + "MassiveScenarioClassification (bn)": 41.17, + "MassiveScenarioClassification (sw)": 34.86, + "MassiveScenarioClassification (es)": 65.07, + "MassiveScenarioClassification (tr)": 66.53, + "MassiveScenarioClassification (ar)": 51.71, + "MassiveScenarioClassification (fi)": 63.74, + "MassiveScenarioClassification (az)": 52.09, + "MassiveScenarioClassification (mn)": 57.07, + "MassiveScenarioClassification (km)": 46.95, + "MassiveScenarioClassification (ro)": 64.2, + "MassiveScenarioClassification (he)": 59.22, + "MassiveScenarioClassification (en)": 71.54, + "MassiveScenarioClassification (am)": 41.89, + "MassiveScenarioClassification (zh-TW)": 65.72, + "MassiveScenarioClassification (ko)": 55.71, + "MassiveScenarioClassification (sl)": 64.01, + "MassiveScenarioClassification (kn)": 45.72, + "MassiveScenarioClassification (da)": 66.87, + "MassiveScenarioClassification (tl)": 37.39, + "MassiveScenarioClassification (de)": 57.4, + "MassiveScenarioClassification (is)": 37.55, + "MassiveScenarioClassification (sq)": 64.34, + "MassiveScenarioClassification (id)": 66.17, + "MassiveScenarioClassification (cy)": 31.71, + "MassiveScenarioClassification (hu)": 66.57, + "MassiveScenarioClassification (nl)": 65.53, + "MassiveScenarioClassification (jv)": 38.62, + "MassiveScenarioClassification (hy)": 56.11, + "MassiveScenarioClassification (th)": 67.05, + "MassiveScenarioClassification (af)": 53.63, + "MassiveScenarioClassification (my)": 59.09, + "MassiveScenarioClassification (fa)": 65.89, + "MassiveScenarioClassification (ja)": 66.49, + "MassiveScenarioClassification (ka)": 50.66, + "MassiveScenarioClassification (vi)": 60.73, + "MassiveScenarioClassification (ta)": 42.63, + "MassiveScenarioClassification (el)": 66.14, + "MassiveScenarioClassification (sv)": 67.14, + "MassiveScenarioClassification (lv)": 59.82, + "MassiveScenarioClassification (ru)": 65.25, + "MassiveScenarioClassification (it)": 65.01, + "MassiveScenarioClassification (ms)": 61.73, + "MassiveScenarioClassification (pt)": 65.83, + "MassiveScenarioClassification (ur)": 60.41, + "MassiveScenarioClassification (pl)": 65.04, + "MassiveScenarioClassification (nb)": 64.25, + "MassiveScenarioClassification (hi)": 65.23, + "MassiveScenarioClassification (te)": 46.49, + "MassiveScenarioClassification (fr)": 66.09, + "MassiveScenarioClassification (ml)": 47.73, + "MultilingualSentiment": 61.9, + "NoRecClassification": 46.7, + "NordicLangClassification": 42.52, + "OnlineShopping": 84.89, + "PAC": 65.75, + "PolEmo2.0-IN": 57.76, + "PolEmo2.0-OUT": 28.66, + "RuReviewsClassification": 58.88, + "RuSciBenchGRNTIClassification": 53.19, + "RuSciBenchOECDClassification": 41.41, + "TNews": 39.19, + "ToxicConversationsClassification": 60.13, + "TweetSentimentExtractionClassification": 56.1, + "Waimai": 82.27 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-code-2", - "AlloProfClusteringP2P": 61.63, - "AlloProfClusteringS2S": 50.67, - "HALClusteringS2S": 27.44, - "MLSUMClusteringP2P": 45.23, - "MLSUMClusteringS2S": 41.48, - "MasakhaNEWSClusteringP2P (fra)": 56.59, - "MasakhaNEWSClusteringS2S (fra)": 35.18 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "8TagsClustering": 23.24, + "AlloProfClusteringP2P": 56.06, + "AlloProfClusteringS2S": 42.16, + "ArxivClusteringP2P": 38.33, + "ArxivClusteringS2S": 31.55, + "BiorxivClusteringP2P": 33.79, + "BiorxivClusteringS2S": 29.41, + "BlurbsClusteringP2P": 32.46, + "BlurbsClusteringS2S": 14.33, + "GeoreviewClusteringP2P": 53.37, + "HALClusteringS2S": 23.21, + "MLSUMClusteringP2P": 39.97, + "MLSUMClusteringP2P (ru)": 37.0, + "MLSUMClusteringS2S": 36.55, + "MLSUMClusteringS2S (ru)": 38.16, + "MasakhaNEWSClusteringP2P": 36.58, + "MasakhaNEWSClusteringP2P (amh)": 40.36, + "MasakhaNEWSClusteringP2P (eng)": 49.96, + "MasakhaNEWSClusteringP2P (fra)": 40.85, + "MasakhaNEWSClusteringP2P (hau)": 19.39, + "MasakhaNEWSClusteringP2P (ibo)": 33.81, + "MasakhaNEWSClusteringP2P (lin)": 51.98, + "MasakhaNEWSClusteringP2P (lug)": 41.88, + "MasakhaNEWSClusteringP2P (orm)": 22.23, + "MasakhaNEWSClusteringP2P (pcm)": 64.64, + "MasakhaNEWSClusteringP2P (run)": 48.03, + "MasakhaNEWSClusteringP2P (sna)": 44.62, + "MasakhaNEWSClusteringP2P (som)": 27.54, + "MasakhaNEWSClusteringP2P (swa)": 22.69, + "MasakhaNEWSClusteringP2P (tir)": 42.02, + "MasakhaNEWSClusteringP2P (xho)": 27.68, + "MasakhaNEWSClusteringP2P (yor)": 27.29, + "MasakhaNEWSClusteringS2S": 33.9, + "MasakhaNEWSClusteringS2S (amh)": 42.28, + "MasakhaNEWSClusteringS2S (eng)": 25.74, + "MasakhaNEWSClusteringS2S (fra)": 36.5, + "MasakhaNEWSClusteringS2S (hau)": 9.2, + "MasakhaNEWSClusteringS2S (ibo)": 33.37, + "MasakhaNEWSClusteringS2S (lin)": 47.76, + "MasakhaNEWSClusteringS2S (lug)": 45.15, + "MasakhaNEWSClusteringS2S (orm)": 22.08, + "MasakhaNEWSClusteringS2S (pcm)": 58.42, + "MasakhaNEWSClusteringS2S (run)": 47.41, + "MasakhaNEWSClusteringS2S (sna)": 43.0, + "MasakhaNEWSClusteringS2S (som)": 26.22, + "MasakhaNEWSClusteringS2S (swa)": 13.53, + "MasakhaNEWSClusteringS2S (tir)": 42.4, + "MasakhaNEWSClusteringS2S (xho)": 21.03, + "MasakhaNEWSClusteringS2S (yor)": 27.04, + "MedrxivClusteringP2P": 31.0, + "MedrxivClusteringS2S": 30.85, + "RedditClustering": 42.64, + "RedditClusteringP2P": 50.1, + "RuSciBenchGRNTIClusteringP2P": 48.22, + "RuSciBenchOECDClusteringP2P": 41.68, + "StackExchangeClustering": 49.29, + "StackExchangeClusteringP2P": 31.67, + "TenKGnadClusteringP2P": 36.13, + "TenKGnadClusteringS2S": 22.26, + "TwentyNewsgroupsClustering": 39.99 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-code-2", - "OpusparcusPC (fr)": 92.87, - "PawsXPairClassification (fr)": 60.83 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "CDSC-E": 72.22, + "OpusparcusPC": 92.01, + "PPC": 91.8, + "PSC": 97.14, + "PawsXPairClassification": 56.94, + "SICK-E-PL": 71.94, + "SprintDuplicateQuestions": 89.46, + "TwitterSemEval2015": 62.06, + "TwitterURLCorpus": 83.83 + }, + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "CDSC-E": 72.32, + "OpusparcusPC": 92.01, + "OpusparcusPC (de)": 96.63, + "OpusparcusPC (en)": 98.59, + "OpusparcusPC (fi)": 93.2, + "OpusparcusPC (fr)": 92.01, + "OpusparcusPC (ru)": 88.25, + "OpusparcusPC (sv)": 93.99, + "PPC": 92.36, + "PSC": 97.14, + "PawsXPairClassification": 57.03, + "PawsXPairClassification (de)": 53.34, + "PawsXPairClassification (en)": 55.94, + "PawsXPairClassification (es)": 54.61, + "PawsXPairClassification (fr)": 57.13, + "PawsXPairClassification (ja)": 48.84, + "PawsXPairClassification (ko)": 49.86, + "PawsXPairClassification (zh)": 54.59, + "SICK-E-PL": 71.94, + "SprintDuplicateQuestions": 91.15, + "TERRa": 58.56, + "TwitterSemEval2015": 65.06, + "TwitterURLCorpus": 83.83 }, { - "Model": "voyage-code-2", - "OpusparcusPC (fr)": 92.87, - "PawsXPairClassification (fr)": 60.88 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "CDSC-E": 72.22, + "OpusparcusPC (de)": 96.63, + "OpusparcusPC (en)": 98.59, + "OpusparcusPC (fi)": 93.2, + "OpusparcusPC (fr)": 92.01, + "OpusparcusPC (ru)": 88.25, + "OpusparcusPC (sv)": 93.99, + "PSC": 97.14, + "PawsXPairClassification (de)": 53.26, + "PawsXPairClassification (en)": 55.94, + "PawsXPairClassification (es)": 54.61, + "PawsXPairClassification (fr)": 56.94, + "PawsXPairClassification (ja)": 48.66, + "PawsXPairClassification (ko)": 49.69, + "PawsXPairClassification (zh)": 54.3, + "SICK-E-PL": 71.94, + "SprintDuplicateQuestions": 89.46, + "TERRa": 58.56, + "TwitterSemEval2015": 62.06, + "TwitterURLCorpus": 83.83 } ] }, "Reranking": { "map": [ { - "Model": "voyage-code-2", - "AlloprofReranking": 70.79, - "SyntecReranking": 86.77 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "AlloprofReranking": 62.42, + "AskUbuntuDupQuestions": 60.49, + "MMarcoReranking": 16.14, + "MindSmallReranking": 30.37, + "RuBQReranking": 52.8, + "SciDocsRR": 77.78, + "StackOverflowDupQuestions": 45.85, + "SyntecReranking": 72.5, + "T2Reranking": 65.28 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-code-2", - "AlloprofRetrieval": 52.61, - "BSARDRetrieval": 0.29, - "MintakaRetrieval (fr)": 19.05, - "SyntecRetrieval": 82.77, - "XPQARetrieval (fr)": 71.95 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "AILACasedocs": 13.66, + "AILAStatutes": 20.52, + "ARCChallenge": 6.19, + "AlloprofRetrieval": 26.63, + "AlphaNLI": 20.89, + "AppsRetrieval": 2.0, + "ArguAna": 44.88, + "ArguAna-PL": 37.86, + "BSARDRetrieval": 9.6, + "CQADupstackRetrieval": 30.7, + "ClimateFEVER": 18.49, + "CmedqaRetrieval": 10.78, + "CodeFeedbackMT": 12.53, + "CodeFeedbackST": 32.53, + "CodeSearchNetCCRetrieval (python)": 44.49, + "CodeSearchNetCCRetrieval (javascript)": 43.56, + "CodeSearchNetCCRetrieval (go)": 28.54, + "CodeSearchNetCCRetrieval (ruby)": 43.84, + "CodeSearchNetCCRetrieval (java)": 26.23, + "CodeSearchNetCCRetrieval (php)": 16.3, + "CodeSearchNetRetrieval (python)": 58.15, + "CodeSearchNetRetrieval (javascript)": 46.41, + "CodeSearchNetRetrieval (go)": 61.49, + "CodeSearchNetRetrieval (ruby)": 56.44, + "CodeSearchNetRetrieval (java)": 32.49, + "CodeSearchNetRetrieval (php)": 45.01, + "CodeTransOceanContest": 25.67, + "CodeTransOceanDL": 11.65, + "CosQA": 14.24, + "CovidRetrieval": 30.11, + "DBPedia": 22.63, + "DBPedia-PL": 18.0, + "DuRetrieval": 34.72, + "EcomRetrieval": 13.32, + "FEVER": 52.66, + "FiQA-PL": 12.49, + "FiQA2018": 20.33, + "GerDaLIRSmall": 2.62, + "HellaSwag": 16.98, + "HotpotQA": 30.01, + "HotpotQA-PL": 22.76, + "LEMBNarrativeQARetrieval": 13.82, + "LEMBQMSumRetrieval": 11.02, + "LEMBSummScreenFDRetrieval": 38.12, + "LEMBWikimQARetrieval": 40.84, + "LeCaRDv2": 32.03, + "LegalBenchConsumerContractsQA": 49.81, + "LegalBenchCorporateLobbying": 88.51, + "LegalQuAD": 13.31, + "LegalSummarization": 54.97, + "MMarcoRetrieval": 46.62, + "MSMARCO": 23.72, + "MSMARCO-PL": 10.39, + "MedicalRetrieval": 15.46, + "MintakaRetrieval": 21.53, + "MintakaRetrieval (ar)": 12.61, + "MintakaRetrieval (de)": 21.77, + "MintakaRetrieval (es)": 21.59, + "MintakaRetrieval (fr)": 21.53, + "MintakaRetrieval (hi)": 16.76, + "MintakaRetrieval (it)": 22.23, + "MintakaRetrieval (ja)": 14.33, + "MintakaRetrieval (pt)": 22.52, + "NFCorpus": 23.45, + "NFCorpus-PL": 17.17, + "NQ": 29.8, + "NQ-PL": 12.56, + "PIQA": 15.79, + "Quail": 2.96, + "Quora-PL": 77.18, + "QuoraRetrieval": 86.55, + "RARbCode": 8.48, + "RARbMath": 30.02, + "RiaNewsRetrieval": 44.82, + "RuBQRetrieval": 29.7, + "SCIDOCS": 13.78, + "SCIDOCS-PL": 10.26, + "SIQA": 0.88, + "SciFact": 48.37, + "SciFact-PL": 40.24, + "SpartQA": 4.94, + "StackOverflowQA": 37.94, + "SyntecRetrieval": 65.54, + "SyntheticText2SQL": 30.57, + "T2Retrieval": 30.31, + "TRECCOVID": 39.12, + "TRECCOVID-PL": 34.23, + "TempReasonL1": 1.43, + "TempReasonL2Fact": 6.21, + "TempReasonL2Pure": 0.22, + "TempReasonL3Fact": 6.77, + "TempReasonL3Pure": 4.9, + "Touche2020": 16.06, + "VideoRetrieval": 14.71, + "WinoGrande": 46.52, + "XPQARetrieval": 42.51, + "XPQARetrieval (ara-ara)": 22.97, + "XPQARetrieval (eng-ara)": 17.17, + "XPQARetrieval (ara-eng)": 25.5, + "XPQARetrieval (deu-deu)": 42.62, + "XPQARetrieval (eng-deu)": 26.52, + "XPQARetrieval (deu-eng)": 48.73, + "XPQARetrieval (spa-spa)": 38.24, + "XPQARetrieval (eng-spa)": 26.09, + "XPQARetrieval (spa-eng)": 41.51, + "XPQARetrieval (fra-fra)": 42.51, + "XPQARetrieval (eng-fra)": 26.09, + "XPQARetrieval (fra-eng)": 43.08, + "XPQARetrieval (hin-hin)": 52.09, + "XPQARetrieval (eng-hin)": 24.08, + "XPQARetrieval (hin-eng)": 49.11, + "XPQARetrieval (ita-ita)": 51.63, + "XPQARetrieval (eng-ita)": 29.34, + "XPQARetrieval (ita-eng)": 46.53, + "XPQARetrieval (jpn-jpn)": 51.57, + "XPQARetrieval (eng-jpn)": 23.87, + "XPQARetrieval (jpn-eng)": 44.93, + "XPQARetrieval (kor-kor)": 21.36, + "XPQARetrieval (eng-kor)": 21.51, + "XPQARetrieval (kor-eng)": 22.59, + "XPQARetrieval (pol-pol)": 28.5, + "XPQARetrieval (eng-pol)": 17.08, + "XPQARetrieval (pol-eng)": 26.54, + "XPQARetrieval (por-por)": 32.33, + "XPQARetrieval (eng-por)": 19.76, + "XPQARetrieval (por-eng)": 34.21, + "XPQARetrieval (tam-tam)": 6.37, + "XPQARetrieval (eng-tam)": 5.36, + "XPQARetrieval (tam-eng)": 9.03, + "XPQARetrieval (cmn-cmn)": 44.16, + "XPQARetrieval (eng-cmn)": 19.03, + "XPQARetrieval (cmn-eng)": 40.08 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-code-2", - "SICKFr": 73.56, - "STS22 (fr)": 79.99, - "STSBenchmarkMultilingualSTS (fr)": 79.02 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "AFQMC": 14.3, + "ATEC": 18.42, + "BIOSSES": 74.18, + "BQ": 38.53, + "CDSC-R": 88.98, + "LCQMC": 63.96, + "PAWSX": 10.13, + "RUParaPhraserSTS": 61.87, + "RuSTSBenchmarkSTS": 79.55, + "SICK-R": 79.61, + "SICK-R-PL": 68.77, + "SICKFr": 75.1, + "STS12": 76.02, + "STS13": 80.7, + "STS14": 78.85, + "STS15": 85.84, + "STS16": 81.05, + "STS17 (es-es)": 85.56, + "STS17 (nl-en)": 81.71, + "STS17 (ko-ko)": 77.03, + "STS17 (en-ar)": 81.22, + "STS17 (it-en)": 82.35, + "STS17 (en-tr)": 76.74, + "STS17 (en-de)": 84.22, + "STS17 (fr-en)": 76.59, + "STS17 (en-en)": 86.87, + "STS17 (ar-ar)": 79.16, + "STS17 (es-en)": 84.44, + "STS22 (pl)": 33.74, + "STS22 (es)": 56.56, + "STS22 (en)": 62.07, + "STS22 (fr)": 70.55, + "STS22 (es-en)": 67.33, + "STS22 (fr-pl)": 50.71, + "STS22 (pl-en)": 69.02, + "STS22 (de-fr)": 51.73, + "STS22 (de)": 44.64, + "STS22 (ar)": 46.2, + "STS22 (de-pl)": 44.22, + "STS22 (zh-en)": 65.71, + "STS22 (it)": 55.22, + "STS22 (tr)": 53.39, + "STS22 (ru)": 57.08, + "STS22 (zh)": 58.75, + "STS22 (es-it)": 47.67, + "STS22 (de-en)": 52.65, + "STSB": 78.91, + "STSBenchmark": 84.42, + "STSBenchmarkMultilingualSTS (es)": 81.1, + "STSBenchmarkMultilingualSTS (zh)": 80.47, + "STSBenchmarkMultilingualSTS (en)": 84.42, + "STSBenchmarkMultilingualSTS (de)": 78.87, + "STSBenchmarkMultilingualSTS (nl)": 79.54, + "STSBenchmarkMultilingualSTS (pl)": 78.29, + "STSBenchmarkMultilingualSTS (fr)": 79.9, + "STSBenchmarkMultilingualSTS (pt)": 80.16, + "STSBenchmarkMultilingualSTS (it)": 80.39, + "STSBenchmarkMultilingualSTS (ru)": 79.32 + }, + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "BIOSSES": 74.18, + "CDSC-R": 88.98, + "SICK-R": 79.61, + "SICK-R-PL": 68.77, + "SICKFr": 75.1, + "STS12": 76.02, + "STS13": 80.7, + "STS14": 78.85, + "STS15": 85.84, + "STS16": 81.05, + "STS17": 81.71, + "STS22": 70.55, + "STSBenchmark": 84.42, + "STSBenchmarkMultilingualSTS": 79.9 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "voyage-code-2", - "SummEvalFr": 28.34 + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "SummEval": 30.67, + "SummEvalFr": 29.2 + }, + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "SummEval": 30.67, + "SummEvalFr": 29.2 + }, + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "SummEval": 30.67, + "SummEvalFr": 29.2 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "CEDRClassification": 37.76, + "SensitiveTopicsClassification": 24.84 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "Core17InstructionRetrieval": -0.62, + "News21InstructionRetrieval": -0.37, + "Robust04InstructionRetrieval": -2.94 + } + ] } }, - "tart-dual-contriever-msmarco": { + "paraphrase-multilingual-mpnet-base-v2": { "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "f1": [ { - "Model": "tart-dual-contriever-msmarco", - "Core17InstructionRetrieval": -3.04, - "News21InstructionRetrieval": -2.98, - "Robust04InstructionRetrieval": -8.98 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "BUCC": 97.56, + "BornholmBitextMining": 18.18, + "Tatoeba (rus-eng)": 92.92, + "Tatoeba (slv-eng)": 97.08, + "Tatoeba (fin-eng)": 95.92, + "Tatoeba (arq-eng)": 19.84, + "Tatoeba (ukr-eng)": 92.67, + "Tatoeba (csb-eng)": 23.73, + "Tatoeba (uzb-eng)": 23.19, + "Tatoeba (tgl-eng)": 17.67, + "Tatoeba (nno-eng)": 81.41, + "Tatoeba (xho-eng)": 6.53, + "Tatoeba (jpn-eng)": 92.51, + "Tatoeba (kor-eng)": 93.07, + "Tatoeba (tel-eng)": 79.73, + "Tatoeba (slk-eng)": 96.62, + "Tatoeba (gsw-eng)": 25.12, + "Tatoeba (ceb-eng)": 7.39, + "Tatoeba (pes-eng)": 93.47, + "Tatoeba (cor-eng)": 3.53, + "Tatoeba (mal-eng)": 88.46, + "Tatoeba (deu-eng)": 97.73, + "Tatoeba (glg-eng)": 95.32, + "Tatoeba (isl-eng)": 59.25, + "Tatoeba (tuk-eng)": 14.91, + "Tatoeba (ido-eng)": 43.91, + "Tatoeba (gle-eng)": 16.85, + "Tatoeba (ron-eng)": 96.43, + "Tatoeba (epo-eng)": 55.12, + "Tatoeba (tat-eng)": 10.89, + "Tatoeba (war-eng)": 7.42, + "Tatoeba (hye-eng)": 94.38, + "Tatoeba (arz-eng)": 55.69, + "Tatoeba (nob-eng)": 98.53, + "Tatoeba (amh-eng)": 53.49, + "Tatoeba (dtp-eng)": 5.03, + "Tatoeba (lvs-eng)": 97.53, + "Tatoeba (tam-eng)": 73.6, + "Tatoeba (ben-eng)": 64.9, + "Tatoeba (hin-eng)": 97.75, + "Tatoeba (kat-eng)": 95.46, + "Tatoeba (fao-eng)": 38.24, + "Tatoeba (gla-eng)": 4.72, + "Tatoeba (urd-eng)": 95.12, + "Tatoeba (mar-eng)": 93.83, + "Tatoeba (bul-eng)": 93.52, + "Tatoeba (afr-eng)": 72.96, + "Tatoeba (swg-eng)": 22.8, + "Tatoeba (ber-eng)": 4.88, + "Tatoeba (bos-eng)": 94.02, + "Tatoeba (nld-eng)": 95.5, + "Tatoeba (fra-eng)": 93.12, + "Tatoeba (hrv-eng)": 97.0, + "Tatoeba (dsb-eng)": 36.85, + "Tatoeba (lat-eng)": 24.25, + "Tatoeba (vie-eng)": 97.23, + "Tatoeba (max-eng)": 48.77, + "Tatoeba (uig-eng)": 48.35, + "Tatoeba (srp-eng)": 94.12, + "Tatoeba (nov-eng)": 50.23, + "Tatoeba (ita-eng)": 93.76, + "Tatoeba (swe-eng)": 95.45, + "Tatoeba (kur-eng)": 61.44, + "Tatoeba (ell-eng)": 94.93, + "Tatoeba (orv-eng)": 23.77, + "Tatoeba (nds-eng)": 38.88, + "Tatoeba (mkd-eng)": 93.02, + "Tatoeba (ile-eng)": 60.36, + "Tatoeba (jav-eng)": 23.39, + "Tatoeba (mon-eng)": 96.14, + "Tatoeba (tzl-eng)": 34.21, + "Tatoeba (cmn-eng)": 95.83, + "Tatoeba (pms-eng)": 34.19, + "Tatoeba (cha-eng)": 12.59, + "Tatoeba (yue-eng)": 77.58, + "Tatoeba (lit-eng)": 95.37, + "Tatoeba (oci-eng)": 43.49, + "Tatoeba (tha-eng)": 95.99, + "Tatoeba (khm-eng)": 58.8, + "Tatoeba (ina-eng)": 84.32, + "Tatoeba (lfn-eng)": 49.56, + "Tatoeba (est-eng)": 98.4, + "Tatoeba (tur-eng)": 96.17, + "Tatoeba (kzj-eng)": 5.88, + "Tatoeba (dan-eng)": 96.17, + "Tatoeba (wuu-eng)": 78.25, + "Tatoeba (cbk-eng)": 58.68, + "Tatoeba (bre-eng)": 6.42, + "Tatoeba (awa-eng)": 42.83, + "Tatoeba (kab-eng)": 1.41, + "Tatoeba (pam-eng)": 5.39, + "Tatoeba (por-eng)": 93.02, + "Tatoeba (fry-eng)": 43.54, + "Tatoeba (swh-eng)": 16.02, + "Tatoeba (yid-eng)": 30.73, + "Tatoeba (sqi-eng)": 98.57, + "Tatoeba (hun-eng)": 94.18, + "Tatoeba (heb-eng)": 88.26, + "Tatoeba (ara-eng)": 90.19, + "Tatoeba (cym-eng)": 22.31, + "Tatoeba (spa-eng)": 97.0, + "Tatoeba (bel-eng)": 79.94, + "Tatoeba (kaz-eng)": 61.49, + "Tatoeba (ind-eng)": 93.5, + "Tatoeba (hsb-eng)": 44.32, + "Tatoeba (ang-eng)": 16.72, + "Tatoeba (aze-eng)": 76.36, + "Tatoeba (pol-eng)": 96.95, + "Tatoeba (mhr-eng)": 7.57, + "Tatoeba (ast-eng)": 70.08, + "Tatoeba (cat-eng)": 96.05, + "Tatoeba (zsm-eng)": 95.8, + "Tatoeba (ces-eng)": 95.73, + "Tatoeba (eus-eng)": 31.33, + "Tatoeba": 95.8 } ] - } - }, - "herbert-base-retrieval-v2": { - "BitextMining": { - "f1": [] }, "Classification": { "accuracy": [ { - "Model": "herbert-base-retrieval-v2", - "AllegroReviews": 34.11, - "CBD": 68.35, - "MassiveIntentClassification (pl)": 65.53, - "MassiveScenarioClassification (pl)": 68.51, - "PAC": 68.4, - "PolEmo2.0-IN": 64.18, - "PolEmo2.0-OUT": 45.73 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "AllegroReviews": 33.86, + "AmazonCounterfactualClassification (en-ext)": 76.25, + "AmazonCounterfactualClassification (en)": 75.81, + "AmazonCounterfactualClassification (de)": 69.96, + "AmazonCounterfactualClassification (ja)": 69.78, + "AmazonCounterfactualClassification": 69.79, + "AmazonPolarityClassification": 76.41, + "AmazonReviewsClassification (en)": 38.52, + "AmazonReviewsClassification (de)": 39.53, + "AmazonReviewsClassification (es)": 39.97, + "AmazonReviewsClassification (fr)": 38.98, + "AmazonReviewsClassification (ja)": 36.65, + "AmazonReviewsClassification (zh)": 37.74, + "AmazonReviewsClassification": 37.74, + "AngryTweetsClassification": 54.84, + "Banking77Classification": 81.07, + "CBD": 65.0, + "DanishPoliticalCommentsClassification": 40.96, + "EmotionClassification": 45.83, + "GeoreviewClassification": 42.33, + "HeadlineClassification": 70.35, + "IFlyTek": 43.98, + "ImdbClassification": 64.57, + "InappropriatenessClassification": 59.32, + "JDReview": 70.34, + "KinopoiskClassification": 44.31, + "LccSentimentClassification": 58.4, + "MTOPDomainClassification (en)": 89.24, + "MTOPDomainClassification (de)": 85.73, + "MTOPDomainClassification (es)": 86.98, + "MTOPDomainClassification (fr)": 81.21, + "MTOPDomainClassification (hi)": 84.76, + "MTOPDomainClassification (th)": 82.51, + "MTOPDomainClassification": 82.51, + "MTOPIntentClassification (en)": 68.69, + "MTOPIntentClassification (de)": 61.26, + "MTOPIntentClassification (es)": 66.6, + "MTOPIntentClassification (fr)": 59.75, + "MTOPIntentClassification (hi)": 62.38, + "MTOPIntentClassification (th)": 64.77, + "MTOPIntentClassification": 64.8, + "MasakhaNEWSClassification (amh)": 78.83, + "MasakhaNEWSClassification (eng)": 75.39, + "MasakhaNEWSClassification (fra)": 72.94, + "MasakhaNEWSClassification (hau)": 54.49, + "MasakhaNEWSClassification (ibo)": 46.79, + "MasakhaNEWSClassification (lin)": 69.77, + "MasakhaNEWSClassification (lug)": 43.05, + "MasakhaNEWSClassification (orm)": 41.97, + "MasakhaNEWSClassification (pcm)": 90.2, + "MasakhaNEWSClassification (run)": 49.97, + "MasakhaNEWSClassification (sna)": 59.78, + "MasakhaNEWSClassification (som)": 47.65, + "MasakhaNEWSClassification (swa)": 60.42, + "MasakhaNEWSClassification (tir)": 45.04, + "MasakhaNEWSClassification (xho)": 48.82, + "MasakhaNEWSClassification (yor)": 58.3, + "MasakhaNEWSClassification": 78.1, + "MassiveIntentClassification (km)": 45.48, + "MassiveIntentClassification (sv)": 64.71, + "MassiveIntentClassification (mn)": 56.61, + "MassiveIntentClassification (ja)": 63.76, + "MassiveIntentClassification (zh-TW)": 62.33, + "MassiveIntentClassification (pt)": 64.88, + "MassiveIntentClassification (nl)": 63.57, + "MassiveIntentClassification (fr)": 64.8, + "MassiveIntentClassification (is)": 37.09, + "MassiveIntentClassification (hu)": 63.85, + "MassiveIntentClassification (az)": 56.98, + "MassiveIntentClassification (th)": 61.12, + "MassiveIntentClassification (ta)": 50.18, + "MassiveIntentClassification (sl)": 63.5, + "MassiveIntentClassification (id)": 65.43, + "MassiveIntentClassification (hy)": 57.76, + "MassiveIntentClassification (fi)": 62.26, + "MassiveIntentClassification (he)": 58.25, + "MassiveIntentClassification (ar)": 51.43, + "MassiveIntentClassification (hi)": 62.79, + "MassiveIntentClassification (my)": 57.08, + "MassiveIntentClassification (bn)": 48.79, + "MassiveIntentClassification (ko)": 61.84, + "MassiveIntentClassification (ro)": 62.83, + "MassiveIntentClassification (af)": 52.35, + "MassiveIntentClassification (te)": 52.85, + "MassiveIntentClassification (fa)": 65.33, + "MassiveIntentClassification (ka)": 49.88, + "MassiveIntentClassification (sq)": 62.49, + "MassiveIntentClassification (vi)": 59.71, + "MassiveIntentClassification (lv)": 61.29, + "MassiveIntentClassification (zh-CN)": 65.32, + "MassiveIntentClassification (cy)": 27.89, + "MassiveIntentClassification (da)": 62.8, + "MassiveIntentClassification (es)": 64.45, + "MassiveIntentClassification (sw)": 31.93, + "MassiveIntentClassification (jv)": 36.49, + "MassiveIntentClassification (en)": 69.32, + "MassiveIntentClassification (kn)": 50.62, + "MassiveIntentClassification (ur)": 56.36, + "MassiveIntentClassification (tr)": 64.58, + "MassiveIntentClassification (de)": 59.56, + "MassiveIntentClassification (it)": 64.69, + "MassiveIntentClassification (ml)": 54.34, + "MassiveIntentClassification (ms)": 60.72, + "MassiveIntentClassification (pl)": 64.32, + "MassiveIntentClassification (nb)": 62.62, + "MassiveIntentClassification (ru)": 63.23, + "MassiveIntentClassification (am)": 41.56, + "MassiveIntentClassification (tl)": 38.83, + "MassiveIntentClassification (el)": 62.63, + "MassiveIntentClassification": 61.88, + "MassiveScenarioClassification (jv)": 44.22, + "MassiveScenarioClassification (kn)": 56.08, + "MassiveScenarioClassification (ar)": 57.79, + "MassiveScenarioClassification (mn)": 60.84, + "MassiveScenarioClassification (sw)": 37.26, + "MassiveScenarioClassification (hi)": 67.94, + "MassiveScenarioClassification (pt)": 70.08, + "MassiveScenarioClassification (ru)": 69.92, + "MassiveScenarioClassification (zh-TW)": 68.71, + "MassiveScenarioClassification (fr)": 70.71, + "MassiveScenarioClassification (ms)": 65.85, + "MassiveScenarioClassification (az)": 61.52, + "MassiveScenarioClassification (ka)": 57.3, + "MassiveScenarioClassification (fi)": 67.58, + "MassiveScenarioClassification (id)": 70.73, + "MassiveScenarioClassification (nb)": 70.23, + "MassiveScenarioClassification (th)": 69.44, + "MassiveScenarioClassification (ml)": 60.14, + "MassiveScenarioClassification (el)": 68.81, + "MassiveScenarioClassification (he)": 65.16, + "MassiveScenarioClassification (vi)": 65.7, + "MassiveScenarioClassification (af)": 59.68, + "MassiveScenarioClassification (hu)": 70.31, + "MassiveScenarioClassification (ta)": 55.97, + "MassiveScenarioClassification (it)": 69.74, + "MassiveScenarioClassification (sq)": 69.62, + "MassiveScenarioClassification (fa)": 69.88, + "MassiveScenarioClassification (cy)": 35.27, + "MassiveScenarioClassification (de)": 67.35, + "MassiveScenarioClassification (nl)": 70.37, + "MassiveScenarioClassification (sl)": 70.81, + "MassiveScenarioClassification (ko)": 68.52, + "MassiveScenarioClassification (lv)": 66.28, + "MassiveScenarioClassification (ur)": 62.92, + "MassiveScenarioClassification (is)": 44.16, + "MassiveScenarioClassification (zh-CN)": 71.25, + "MassiveScenarioClassification (da)": 71.04, + "MassiveScenarioClassification (te)": 58.79, + "MassiveScenarioClassification (en)": 75.35, + "MassiveScenarioClassification (sv)": 71.6, + "MassiveScenarioClassification (pl)": 68.99, + "MassiveScenarioClassification (ro)": 67.94, + "MassiveScenarioClassification (hy)": 63.03, + "MassiveScenarioClassification (am)": 48.96, + "MassiveScenarioClassification (ja)": 69.68, + "MassiveScenarioClassification (tr)": 70.41, + "MassiveScenarioClassification (bn)": 54.52, + "MassiveScenarioClassification (es)": 70.4, + "MassiveScenarioClassification (my)": 63.03, + "MassiveScenarioClassification (km)": 53.13, + "MassiveScenarioClassification (tl)": 43.98, + "MassiveScenarioClassification": 67.9, + "MultilingualSentiment": 66.49, + "NoRecClassification": 50.32, + "NordicLangClassification": 41.57, + "OnlineShopping": 87.75, + "PAC": 63.76, + "PolEmo2.0-IN": 62.78, + "PolEmo2.0-OUT": 19.98, + "RuReviewsClassification": 62.33, + "RuSciBenchGRNTIClassification": 56.01, + "RuSciBenchOECDClassification": 44.14, + "TNews": 43.73, + "ToxicConversationsClassification": 71.02, + "TweetSentimentExtractionClassification": 59.03, + "Waimai": 83.97 } ] }, "Clustering": { "v_measure": [ { - "Model": "herbert-base-retrieval-v2", - "8TagsClustering": 28.15 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "8TagsClustering": 25.62, + "AlloProfClusteringP2P": 54.49, + "AlloProfClusteringS2S": 44.79, + "ArxivClusteringP2P": 37.78, + "ArxivClusteringS2S": 31.68, + "BiorxivClusteringP2P": 33.09, + "BiorxivClusteringS2S": 29.6, + "BlurbsClusteringP2P": 34.38, + "BlurbsClusteringS2S": 15.81, + "GeoreviewClusteringP2P": 56.18, + "HALClusteringS2S": 23.97, + "MLSUMClusteringP2P (ru)": 35.95, + "MLSUMClusteringP2P": 40.55, + "MLSUMClusteringS2S (ru)": 38.88, + "MLSUMClusteringS2S": 37.53, + "MasakhaNEWSClusteringP2P (amh)": 46.85, + "MasakhaNEWSClusteringP2P (eng)": 47.3, + "MasakhaNEWSClusteringP2P (fra)": 53.3, + "MasakhaNEWSClusteringP2P (hau)": 27.61, + "MasakhaNEWSClusteringP2P (ibo)": 41.32, + "MasakhaNEWSClusteringP2P (lin)": 58.37, + "MasakhaNEWSClusteringP2P (lug)": 47.56, + "MasakhaNEWSClusteringP2P (orm)": 24.53, + "MasakhaNEWSClusteringP2P (pcm)": 66.55, + "MasakhaNEWSClusteringP2P (run)": 51.97, + "MasakhaNEWSClusteringP2P (sna)": 45.55, + "MasakhaNEWSClusteringP2P (som)": 33.98, + "MasakhaNEWSClusteringP2P (swa)": 25.03, + "MasakhaNEWSClusteringP2P (tir)": 48.33, + "MasakhaNEWSClusteringP2P (xho)": 29.47, + "MasakhaNEWSClusteringP2P (yor)": 28.25, + "MasakhaNEWSClusteringP2P": 41.57, + "MasakhaNEWSClusteringS2S (amh)": 51.54, + "MasakhaNEWSClusteringS2S (eng)": 43.28, + "MasakhaNEWSClusteringS2S (fra)": 37.92, + "MasakhaNEWSClusteringS2S (hau)": 17.97, + "MasakhaNEWSClusteringS2S (ibo)": 34.56, + "MasakhaNEWSClusteringS2S (lin)": 57.43, + "MasakhaNEWSClusteringS2S (lug)": 45.22, + "MasakhaNEWSClusteringS2S (orm)": 21.9, + "MasakhaNEWSClusteringS2S (pcm)": 62.1, + "MasakhaNEWSClusteringS2S (run)": 46.81, + "MasakhaNEWSClusteringS2S (sna)": 43.15, + "MasakhaNEWSClusteringS2S (som)": 29.44, + "MasakhaNEWSClusteringS2S (swa)": 10.31, + "MasakhaNEWSClusteringS2S (tir)": 51.95, + "MasakhaNEWSClusteringS2S (xho)": 21.26, + "MasakhaNEWSClusteringS2S (yor)": 28.88, + "MasakhaNEWSClusteringS2S": 30.88, + "MedrxivClusteringP2P": 31.96, + "MedrxivClusteringS2S": 31.7, + "RedditClustering": 45.24, + "RedditClusteringP2P": 51.31, + "RuSciBenchGRNTIClusteringP2P": 48.47, + "RuSciBenchOECDClusteringP2P": 42.9, + "StackExchangeClustering": 52.98, + "StackExchangeClusteringP2P": 32.94, + "TenKGnadClusteringP2P": 35.96, + "TenKGnadClusteringS2S": 22.0, + "TwentyNewsgroupsClustering": 44.1 } ] }, "PairClassification": { "max_ap": [ { - "Model": "herbert-base-retrieval-v2", - "CDSC-E": 63.31, - "PPC": 84.18, - "PSC": 98.87, - "SICK-E-PL": 54.93 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "CDSC-E": 75.77, + "OpusparcusPC (de)": 97.34, + "OpusparcusPC (en)": 98.59, + "OpusparcusPC (fi)": 95.33, + "OpusparcusPC (fr)": 93.45, + "OpusparcusPC (ru)": 90.47, + "OpusparcusPC (sv)": 95.16, + "PSC": 98.26, + "PawsXPairClassification (de)": 55.69, + "PawsXPairClassification (en)": 60.12, + "PawsXPairClassification (es)": 56.94, + "PawsXPairClassification (fr)": 58.14, + "PawsXPairClassification (ja)": 49.37, + "PawsXPairClassification (ko)": 50.66, + "PawsXPairClassification (zh)": 55.47, + "SICK-E-PL": 77.22, + "SprintDuplicateQuestions": 90.55, + "TERRa": 64.57, + "TwitterSemEval2015": 66.75, + "TwitterURLCorpus": 85.14 }, { - "Model": "herbert-base-retrieval-v2", - "CDSC-E": 63.61, - "PPC": 84.37, - "PSC": 98.98, - "SICK-E-PL": 54.95 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "CDSC-E": 75.76, + "OpusparcusPC (de)": 97.34, + "OpusparcusPC (en)": 98.59, + "OpusparcusPC (fi)": 95.33, + "OpusparcusPC (fr)": 93.45, + "OpusparcusPC (ru)": 90.47, + "OpusparcusPC (sv)": 95.16, + "OpusparcusPC": 93.45, + "PPC": 93.67, + "PSC": 98.26, + "PawsXPairClassification (de)": 55.71, + "PawsXPairClassification (en)": 60.12, + "PawsXPairClassification (es)": 56.94, + "PawsXPairClassification (fr)": 58.33, + "PawsXPairClassification (ja)": 49.37, + "PawsXPairClassification (ko)": 50.78, + "PawsXPairClassification (zh)": 55.47, + "PawsXPairClassification": 58.17, + "SICK-E-PL": 77.22, + "SprintDuplicateQuestions": 91.1, + "TERRa": 64.57, + "TwitterSemEval2015": 68.75, + "TwitterURLCorpus": 85.32 + }, + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "CDSC-E": 75.76, + "OpusparcusPC": 93.45, + "PPC": 93.67, + "PSC": 98.26, + "PawsXPairClassification": 58.14, + "SICK-E-PL": 77.22, + "SprintDuplicateQuestions": 90.55, + "TwitterSemEval2015": 66.75, + "TwitterURLCorpus": 85.14 } ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "AlloprofReranking": 54.34, + "AskUbuntuDupQuestions": 60.16, + "MMarcoReranking": 14.57, + "MindSmallReranking": 30.15, + "RuBQReranking": 58.77, + "SciDocsRR": 78.09, + "StackOverflowDupQuestions": 46.79, + "SyntecReranking": 83.23, + "T2Reranking": 64.49 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "herbert-base-retrieval-v2", - "ArguAna-PL": 41.97, - "DBPedia-PL": 24.07, - "FiQA-PL": 24.25, - "HotpotQA-PL": 43.41, - "MSMARCO-PL": 51.56, - "NFCorpus-PL": 25.95, - "NQ-PL": 35.09, - "Quora-PL": 78.86, - "SCIDOCS-PL": 11.0, - "SciFact-PL": 51.92, - "TRECCOVID-PL": 42.64 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "AILACasedocs": 17.45, + "AILAStatutes": 22.24, + "ARCChallenge": 7.19, + "AlloprofRetrieval": 30.8, + "AlphaNLI": 21.87, + "AppsRetrieval": 2.34, + "ArguAna": 48.91, + "ArguAna-PL": 42.62, + "BSARDRetrieval": 0.0, + "CQADupstackRetrieval": 31.32, + "ClimateFEVER": 15.27, + "CmedqaRetrieval": 10.15, + "CodeFeedbackMT": 11.43, + "CodeFeedbackST": 35.34, + "CodeSearchNetCCRetrieval (python)": 53.43, + "CodeSearchNetCCRetrieval (javascript)": 57.46, + "CodeSearchNetCCRetrieval (go)": 26.63, + "CodeSearchNetCCRetrieval (ruby)": 55.02, + "CodeSearchNetCCRetrieval (java)": 42.1, + "CodeSearchNetCCRetrieval (php)": 29.8, + "CodeSearchNetRetrieval (python)": 65.54, + "CodeSearchNetRetrieval (javascript)": 48.34, + "CodeSearchNetRetrieval (go)": 67.23, + "CodeSearchNetRetrieval (ruby)": 60.19, + "CodeSearchNetRetrieval (java)": 45.56, + "CodeSearchNetRetrieval (php)": 48.3, + "CodeTransOceanContest": 27.94, + "CodeTransOceanDL": 16.71, + "CosQA": 16.57, + "CovidRetrieval": 28.85, + "DBPedia": 26.22, + "DBPedia-PL": 20.18, + "DuRetrieval": 33.41, + "EcomRetrieval": 9.69, + "FEVER": 56.76, + "FiQA-PL": 14.68, + "FiQA2018": 22.96, + "GerDaLIRSmall": 3.0, + "HellaSwag": 17.53, + "HotpotQA": 37.03, + "HotpotQA-PL": 29.36, + "LEMBNarrativeQARetrieval": 16.02, + "LEMBQMSumRetrieval": 12.23, + "LEMBSummScreenFDRetrieval": 41.15, + "LEMBWikimQARetrieval": 38.86, + "LeCaRDv2": 33.91, + "LegalBenchConsumerContractsQA": 52.37, + "LegalBenchCorporateLobbying": 87.62, + "LegalQuAD": 17.8, + "LegalSummarization": 56.8, + "MMarcoRetrieval": 44.62, + "MSMARCO": 26.6, + "MSMARCO-PL": 12.45, + "MedicalRetrieval": 14.1, + "MintakaRetrieval (ar)": 14.55, + "MintakaRetrieval (de)": 25.43, + "MintakaRetrieval (es)": 24.94, + "MintakaRetrieval (fr)": 24.45, + "MintakaRetrieval (hi)": 18.67, + "MintakaRetrieval (it)": 25.62, + "MintakaRetrieval (ja)": 15.46, + "MintakaRetrieval (pt)": 26.15, + "MintakaRetrieval": 24.45, + "NFCorpus": 25.49, + "NFCorpus-PL": 18.53, + "NQ": 33.6, + "NQ-PL": 15.64, + "PIQA": 18.65, + "Quail": 2.98, + "Quora-PL": 79.18, + "QuoraRetrieval": 86.4, + "RARbCode": 11.02, + "RARbMath": 30.93, + "RiaNewsRetrieval": 51.75, + "RuBQRetrieval": 37.04, + "SCIDOCS": 13.97, + "SCIDOCS-PL": 11.18, + "SIQA": 1.21, + "SciFact": 50.3, + "SciFact-PL": 41.53, + "SpartQA": 5.69, + "StackOverflowQA": 43.11, + "SyntecRetrieval": 76.0, + "SyntheticText2SQL": 35.22, + "T2Retrieval": 28.35, + "TRECCOVID": 37.87, + "TRECCOVID-PL": 35.38, + "TempReasonL1": 1.94, + "TempReasonL2Fact": 5.34, + "TempReasonL2Pure": 0.33, + "TempReasonL3Fact": 6.79, + "TempReasonL3Pure": 3.19, + "Touche2020": 17.4, + "VideoRetrieval": 14.18, + "WinoGrande": 49.01, + "XPQARetrieval (ara-ara)": 24.86, + "XPQARetrieval (eng-ara)": 19.6, + "XPQARetrieval (ara-eng)": 28.21, + "XPQARetrieval (deu-deu)": 48.81, + "XPQARetrieval (eng-deu)": 31.93, + "XPQARetrieval (deu-eng)": 53.26, + "XPQARetrieval (spa-spa)": 41.08, + "XPQARetrieval (eng-spa)": 30.05, + "XPQARetrieval (spa-eng)": 43.4, + "XPQARetrieval (fra-fra)": 46.22, + "XPQARetrieval (eng-fra)": 29.55, + "XPQARetrieval (fra-eng)": 47.3, + "XPQARetrieval (hin-hin)": 50.74, + "XPQARetrieval (eng-hin)": 24.97, + "XPQARetrieval (hin-eng)": 49.24, + "XPQARetrieval (ita-ita)": 52.87, + "XPQARetrieval (eng-ita)": 33.44, + "XPQARetrieval (ita-eng)": 51.49, + "XPQARetrieval (jpn-jpn)": 53.17, + "XPQARetrieval (eng-jpn)": 26.66, + "XPQARetrieval (jpn-eng)": 49.86, + "XPQARetrieval (kor-kor)": 24.88, + "XPQARetrieval (eng-kor)": 24.5, + "XPQARetrieval (kor-eng)": 24.61, + "XPQARetrieval (pol-pol)": 29.36, + "XPQARetrieval (eng-pol)": 20.48, + "XPQARetrieval (pol-eng)": 29.31, + "XPQARetrieval (por-por)": 34.3, + "XPQARetrieval (eng-por)": 21.72, + "XPQARetrieval (por-eng)": 37.65, + "XPQARetrieval (tam-tam)": 19.8, + "XPQARetrieval (eng-tam)": 13.93, + "XPQARetrieval (tam-eng)": 18.26, + "XPQARetrieval (cmn-cmn)": 42.54, + "XPQARetrieval (eng-cmn)": 20.91, + "XPQARetrieval (cmn-eng)": 42.81, + "XPQARetrieval": 46.22 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "AFQMC": 15.69, + "ATEC": 20.27, + "BIOSSES": 76.27, + "BQ": 36.33, + "CDSC-R": 88.8, + "LCQMC": 63.3, + "PAWSX": 12.16, + "RUParaPhraserSTS": 65.74, + "RuSTSBenchmarkSTS": 82.46, + "SICK-R": 79.62, + "SICK-R-PL": 73.13, + "SICKFr": 75.56, + "STS12": 77.9, + "STS13": 85.11, + "STS14": 80.81, + "STS15": 87.48, + "STS16": 83.2, + "STS17 (ko-ko)": 83.41, + "STS17 (nl-en)": 82.51, + "STS17 (en-tr)": 74.9, + "STS17 (en-ar)": 80.85, + "STS17 (es-en)": 86.11, + "STS17 (fr-en)": 81.17, + "STS17 (ar-ar)": 79.1, + "STS17 (it-en)": 84.24, + "STS17 (en-de)": 83.28, + "STS17 (es-es)": 85.14, + "STS17 (en-en)": 86.99, + "STS22 (ru)": 58.74, + "STS22 (es-en)": 70.26, + "STS22 (es)": 59.91, + "STS22 (tr)": 56.3, + "STS22 (it)": 60.65, + "STS22 (ar)": 52.19, + "STS22 (pl)": 33.65, + "STS22 (de)": 46.7, + "STS22 (zh)": 61.75, + "STS22 (fr)": 74.3, + "STS22 (de-en)": 50.81, + "STS22 (pl-en)": 73.07, + "STS22 (en)": 63.52, + "STS22 (es-it)": 53.7, + "STS22 (de-fr)": 62.34, + "STS22 (de-pl)": 40.53, + "STS22 (zh-en)": 67.96, + "STS22 (fr-pl)": 84.52, + "STSB": 80.84, + "STSBenchmark": 86.82, + "STSBenchmarkMultilingualSTS (es)": 84.61, + "STSBenchmarkMultilingualSTS (pt)": 84.0, + "STSBenchmarkMultilingualSTS (nl)": 83.36, + "STSBenchmarkMultilingualSTS (de)": 83.56, + "STSBenchmarkMultilingualSTS (pl)": 81.46, + "STSBenchmarkMultilingualSTS (ru)": 82.45, + "STSBenchmarkMultilingualSTS (fr)": 84.69, + "STSBenchmarkMultilingualSTS (en)": 86.82, + "STSBenchmarkMultilingualSTS (it)": 84.09, + "STSBenchmarkMultilingualSTS (zh)": 81.98 + }, + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "BIOSSES": 76.27, + "CDSC-R": 88.8, + "SICK-R": 79.62, + "SICK-R-PL": 73.13, + "SICKFr": 75.56, + "STS12": 77.9, + "STS13": 85.11, + "STS14": 80.81, + "STS15": 87.48, + "STS16": 83.2, + "STS17": 82.51, + "STS22": 74.3, + "STSBenchmark": 86.82, + "STSBenchmarkMultilingualSTS": 84.69 } ] }, - "STS": { + "Summarization": { "cosine_spearman": [ { - "Model": "herbert-base-retrieval-v2", - "CDSC-R": 86.18, - "SICK-R-PL": 64.67, - "STS22 (pl)": 39.73 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "SummEval": 31.57, + "SummEvalFr": 29.47 + }, + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "SummEval": 31.57, + "SummEvalFr": 29.47 + }, + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "SummEval": 31.57, + "SummEvalFr": 29.47 } ] }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "tart-full-flan-t5-xl": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "CEDRClassification": 39.98, + "SensitiveTopicsClassification": 25.83 + } + ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "tart-full-flan-t5-xl", - "Core17InstructionRetrieval": 2.82, - "News21InstructionRetrieval": 1.99, - "Robust04InstructionRetrieval": -0.72 + "Model": "paraphrase-multilingual-mpnet-base-v2", + "Core17InstructionRetrieval": -0.04, + "News21InstructionRetrieval": 0.61, + "Robust04InstructionRetrieval": -3.99 } ] } }, - "contriever": { + "rubert-base-cased": { "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "f1": [ { - "Model": "contriever", - "ARCChallenge": 8.62, - "AlphaNLI": 31.77, - "HellaSwag": 14.42, - "PIQA": 24.64, - "Quail": 4.97, - "RARbCode": 9.28, - "RARbMath": 30.76, - "SIQA": 1.27, - "SpartQA": 10.94, - "TempReasonL1": 1.93, - "TempReasonL2Fact": 22.68, - "TempReasonL2Pure": 1.12, - "TempReasonL3Fact": 20.62, - "TempReasonL3Pure": 7.8, - "WinoGrande": 47.15 + "Model": "rubert-base-cased", + "Tatoeba (rus-eng)": 16.76 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "text-similarity-curie-001": { - "BitextMining": { - "f1": [] - }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "rubert-base-cased", + "GeoreviewClassification": 37.22, + "HeadlineClassification": 75.23, + "InappropriatenessClassification": 57.34, + "KinopoiskClassification": 49.91, + "MassiveIntentClassification (ru)": 53.02, + "MassiveScenarioClassification (ru)": 56.79, + "RuReviewsClassification": 50.74, + "RuSciBenchGRNTIClassification": 48.03, + "RuSciBenchOECDClassification": 36.13 + } + ] }, "Clustering": { "v_measure": [ { - "Model": "text-similarity-curie-001", - "RedditClustering": 40.79, - "StackExchangeClustering": 55.14, - "TwentyNewsgroupsClustering": 37.64 + "Model": "rubert-base-cased", + "GeoreviewClusteringP2P": 28.77, + "MLSUMClusteringP2P (ru)": 41.42, + "MLSUMClusteringS2S (ru)": 40.52, + "RuSciBenchGRNTIClusteringP2P": 28.29, + "RuSciBenchOECDClusteringP2P": 26.67 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-similarity-curie-001", - "SprintDuplicateQuestions": 79.85, - "TwitterSemEval2015": 69.45, - "TwitterURLCorpus": 84.06 + "Model": "rubert-base-cased", + "OpusparcusPC (ru)": 81.65, + "TERRa": 52.12 + }, + { + "Model": "rubert-base-cased", + "OpusparcusPC (ru)": 81.65, + "TERRa": 53.17 } ] }, "Reranking": { "map": [ { - "Model": "text-similarity-curie-001", - "AskUbuntuDupQuestions": 55.09, - "SciDocsRR": 70.93, - "StackOverflowDupQuestions": 42.42 + "Model": "rubert-base-cased", + "MIRACLReranking (ru)": 13.27 + }, + { + "Model": "rubert-base-cased", + "RuBQReranking": 41.65 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-similarity-curie-001", - "FiQA2018": 5.14, - "NFCorpus": 19.96, - "QuoraRetrieval": 83.11, - "SciFact": 46.68, - "TRECCOVID": 7.61 + "Model": "rubert-base-cased", + "MIRACLRetrieval (ru)": 0.88, + "RiaNewsRetrieval": 5.58, + "RuBQRetrieval": 9.52 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-similarity-curie-001", - "BIOSSES": 77.46, - "SICK-R": 77.26, - "STSBenchmark": 83.02 + "Model": "rubert-base-cased", + "RUParaPhraserSTS": 49.72, + "RuSTSBenchmarkSTS": 53.95, + "STS22 (ru)": 34.98, + "STSBenchmarkMultilingualSTS (ru)": 53.76 } ] }, @@ -16692,962 +17225,695 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "rubert-base-cased", + "CEDRClassification": 33.59, + "SensitiveTopicsClassification": 18.8 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "sentence-t5-xl": { + "rubert-base-cased-sentence": { "BitextMining": { "f1": [ { - "Model": "sentence-t5-xl", - "BUCC (de-en)": 95.04, - "BUCC (fr-en)": 94.96, - "BUCC (ru-en)": 8.33, - "BUCC (zh-en)": 1.3, - "Tatoeba (afr-eng)": 41.84, - "Tatoeba (amh-eng)": 0.03, - "Tatoeba (ang-eng)": 37.87, - "Tatoeba (ara-eng)": 0.61, - "Tatoeba (arq-eng)": 0.74, - "Tatoeba (arz-eng)": 0.42, - "Tatoeba (ast-eng)": 65.41, - "Tatoeba (awa-eng)": 1.46, - "Tatoeba (aze-eng)": 8.79, - "Tatoeba (bel-eng)": 5.76, - "Tatoeba (ben-eng)": 0.01, - "Tatoeba (ber-eng)": 5.92, - "Tatoeba (bos-eng)": 16.12, - "Tatoeba (bre-eng)": 6.12, - "Tatoeba (bul-eng)": 9.06, - "Tatoeba (cat-eng)": 57.4, - "Tatoeba (cbk-eng)": 57.68, - "Tatoeba (ceb-eng)": 12.56, - "Tatoeba (ces-eng)": 9.47, - "Tatoeba (cha-eng)": 27.13, - "Tatoeba (cmn-eng)": 1.82, - "Tatoeba (cor-eng)": 3.87, - "Tatoeba (csb-eng)": 14.41, - "Tatoeba (cym-eng)": 6.69, - "Tatoeba (dan-eng)": 54.87, - "Tatoeba (deu-eng)": 93.72, - "Tatoeba (dsb-eng)": 14.74, - "Tatoeba (dtp-eng)": 5.84, - "Tatoeba (ell-eng)": 0.6, - "Tatoeba (epo-eng)": 30.8, - "Tatoeba (est-eng)": 5.39, - "Tatoeba (eus-eng)": 11.9, - "Tatoeba (fao-eng)": 28.08, - "Tatoeba (fin-eng)": 6.81, - "Tatoeba (fra-eng)": 85.29, - "Tatoeba (fry-eng)": 38.68, - "Tatoeba (gla-eng)": 2.96, - "Tatoeba (gle-eng)": 3.74, - "Tatoeba (glg-eng)": 70.0, - "Tatoeba (gsw-eng)": 30.49, - "Tatoeba (heb-eng)": 0.87, - "Tatoeba (hin-eng)": 0.1, - "Tatoeba (hrv-eng)": 17.43, - "Tatoeba (hsb-eng)": 14.69, - "Tatoeba (hun-eng)": 7.28, - "Tatoeba (hye-eng)": 0.77, - "Tatoeba (ido-eng)": 46.65, - "Tatoeba (ile-eng)": 59.43, - "Tatoeba (ina-eng)": 82.71, - "Tatoeba (ind-eng)": 37.26, - "Tatoeba (isl-eng)": 11.21, - "Tatoeba (ita-eng)": 79.77, - "Tatoeba (jav-eng)": 7.81, - "Tatoeba (jpn-eng)": 0.91, - "Tatoeba (kab-eng)": 2.23, - "Tatoeba (kat-eng)": 1.48, - "Tatoeba (kaz-eng)": 1.77, - "Tatoeba (khm-eng)": 0.38, - "Tatoeba (kor-eng)": 1.96, - "Tatoeba (kur-eng)": 12.11, - "Tatoeba (kzj-eng)": 6.13, - "Tatoeba (lat-eng)": 27.84, - "Tatoeba (lfn-eng)": 45.89, - "Tatoeba (lit-eng)": 5.94, - "Tatoeba (lvs-eng)": 8.11, - "Tatoeba (mal-eng)": 0.59, - "Tatoeba (mar-eng)": 0.03, - "Tatoeba (max-eng)": 21.7, - "Tatoeba (mhr-eng)": 0.68, - "Tatoeba (mkd-eng)": 5.92, - "Tatoeba (mon-eng)": 2.39, - "Tatoeba (nds-eng)": 45.04, - "Tatoeba (nld-eng)": 64.75, - "Tatoeba (nno-eng)": 36.74, - "Tatoeba (nob-eng)": 54.77, - "Tatoeba (nov-eng)": 57.12, - "Tatoeba (oci-eng)": 34.39, - "Tatoeba (orv-eng)": 2.04, - "Tatoeba (pam-eng)": 8.34, - "Tatoeba (pes-eng)": 0.87, - "Tatoeba (pms-eng)": 38.06, - "Tatoeba (pol-eng)": 28.35, - "Tatoeba (por-eng)": 83.61, - "Tatoeba (ron-eng)": 65.27, - "Tatoeba (rus-eng)": 30.42, - "Tatoeba (slk-eng)": 13.19, - "Tatoeba (slv-eng)": 13.49, - "Tatoeba (spa-eng)": 89.18, - "Tatoeba (sqi-eng)": 14.66, - "Tatoeba (srp-eng)": 13.24, - "Tatoeba (swe-eng)": 60.67, - "Tatoeba (swg-eng)": 34.76, - "Tatoeba (swh-eng)": 8.07, - "Tatoeba (tam-eng)": 0.36, - "Tatoeba (tat-eng)": 1.46, - "Tatoeba (tel-eng)": 0.67, - "Tatoeba (tgl-eng)": 25.22, - "Tatoeba (tha-eng)": 1.58, - "Tatoeba (tuk-eng)": 4.99, - "Tatoeba (tur-eng)": 7.72, - "Tatoeba (tzl-eng)": 38.49, - "Tatoeba (uig-eng)": 0.87, - "Tatoeba (ukr-eng)": 9.12, - "Tatoeba (urd-eng)": 0.0, - "Tatoeba (uzb-eng)": 5.48, - "Tatoeba (vie-eng)": 8.45, - "Tatoeba (war-eng)": 13.75, - "Tatoeba (wuu-eng)": 1.44, - "Tatoeba (xho-eng)": 9.15, - "Tatoeba (yid-eng)": 0.28, - "Tatoeba (yue-eng)": 0.98, - "Tatoeba (zsm-eng)": 35.71 + "Model": "rubert-base-cased-sentence", + "Tatoeba (rus-eng)": 20.26 } ] }, "Classification": { "accuracy": [ { - "Model": "sentence-t5-xl", - "AmazonCounterfactualClassification (de)": 67.01, - "AmazonCounterfactualClassification (en)": 76.01, - "AmazonCounterfactualClassification (en-ext)": 77.29, - "AmazonCounterfactualClassification (ja)": 45.61, - "AmazonPolarityClassification": 93.17, - "AmazonReviewsClassification (de)": 44.05, - "AmazonReviewsClassification (en)": 48.18, - "AmazonReviewsClassification (es)": 45.01, - "AmazonReviewsClassification (fr)": 43.52, - "AmazonReviewsClassification (ja)": 22.23, - "AmazonReviewsClassification (zh)": 21.88, - "Banking77Classification": 80.88, - "EmotionClassification": 51.95, - "ImdbClassification": 87.54, - "MTOPDomainClassification (de)": 83.28, - "MTOPDomainClassification (en)": 90.73, - "MTOPDomainClassification (es)": 85.32, - "MTOPDomainClassification (fr)": 85.14, - "MTOPDomainClassification (hi)": 20.85, - "MTOPDomainClassification (th)": 15.62, - "MTOPIntentClassification (de)": 54.65, - "MTOPIntentClassification (en)": 68.15, - "MTOPIntentClassification (es)": 57.38, - "MTOPIntentClassification (fr)": 54.39, - "MTOPIntentClassification (hi)": 3.28, - "MTOPIntentClassification (th)": 5.08, - "MasakhaNEWSClassification (fra)": 80.09, - "MassiveIntentClassification (af)": 40.17, - "MassiveIntentClassification (am)": 2.18, - "MassiveIntentClassification (ar)": 4.18, - "MassiveIntentClassification (az)": 30.02, - "MassiveIntentClassification (bn)": 2.6, - "MassiveIntentClassification (cy)": 29.15, - "MassiveIntentClassification (da)": 47.69, - "MassiveIntentClassification (de)": 57.43, - "MassiveIntentClassification (el)": 9.96, - "MassiveIntentClassification (en)": 72.09, - "MassiveIntentClassification (es)": 57.97, - "MassiveIntentClassification (fa)": 3.6, - "MassiveIntentClassification (fi)": 34.02, - "MassiveIntentClassification (fr)": 60.99, - "MassiveIntentClassification (he)": 2.51, - "MassiveIntentClassification (hi)": 3.02, - "MassiveIntentClassification (hu)": 31.66, - "MassiveIntentClassification (hy)": 3.32, - "MassiveIntentClassification (id)": 41.53, - "MassiveIntentClassification (is)": 30.25, - "MassiveIntentClassification (it)": 56.57, - "MassiveIntentClassification (ja)": 3.5, - "MassiveIntentClassification (jv)": 31.67, - "MassiveIntentClassification (ka)": 2.79, - "MassiveIntentClassification (km)": 5.43, - "MassiveIntentClassification (kn)": 2.79, - "MassiveIntentClassification (ko)": 2.67, - "MassiveIntentClassification (lv)": 34.25, - "MassiveIntentClassification (ml)": 2.98, - "MassiveIntentClassification (mn)": 20.99, - "MassiveIntentClassification (ms)": 37.43, - "MassiveIntentClassification (my)": 4.02, - "MassiveIntentClassification (nb)": 45.91, - "MassiveIntentClassification (nl)": 50.51, - "MassiveIntentClassification (pl)": 43.95, - "MassiveIntentClassification (pt)": 57.95, - "MassiveIntentClassification (ro)": 49.37, - "MassiveIntentClassification (ru)": 33.46, - "MassiveIntentClassification (sl)": 36.33, - "MassiveIntentClassification (sq)": 37.65, - "MassiveIntentClassification (sv)": 46.35, - "MassiveIntentClassification (sw)": 30.6, - "MassiveIntentClassification (ta)": 1.79, - "MassiveIntentClassification (te)": 2.26, - "MassiveIntentClassification (th)": 4.02, - "MassiveIntentClassification (tl)": 38.92, - "MassiveIntentClassification (tr)": 32.05, - "MassiveIntentClassification (ur)": 2.7, - "MassiveIntentClassification (vi)": 21.47, - "MassiveIntentClassification (zh-CN)": 0.59, - "MassiveIntentClassification (zh-TW)": 3.24, - "MassiveScenarioClassification (af)": 50.81, - "MassiveScenarioClassification (am)": 6.95, - "MassiveScenarioClassification (ar)": 12.32, - "MassiveScenarioClassification (az)": 38.79, - "MassiveScenarioClassification (bn)": 8.0, - "MassiveScenarioClassification (cy)": 33.91, - "MassiveScenarioClassification (da)": 55.79, - "MassiveScenarioClassification (de)": 65.33, - "MassiveScenarioClassification (el)": 16.89, - "MassiveScenarioClassification (en)": 73.26, - "MassiveScenarioClassification (es)": 62.52, - "MassiveScenarioClassification (fa)": 6.08, - "MassiveScenarioClassification (fi)": 43.34, - "MassiveScenarioClassification (fr)": 66.42, - "MassiveScenarioClassification (he)": 7.55, - "MassiveScenarioClassification (hi)": 7.44, - "MassiveScenarioClassification (hu)": 40.85, - "MassiveScenarioClassification (hy)": 9.25, - "MassiveScenarioClassification (id)": 51.92, - "MassiveScenarioClassification (is)": 40.09, - "MassiveScenarioClassification (it)": 62.94, - "MassiveScenarioClassification (ja)": 7.9, - "MassiveScenarioClassification (jv)": 41.33, - "MassiveScenarioClassification (ka)": 7.76, - "MassiveScenarioClassification (km)": 9.19, - "MassiveScenarioClassification (kn)": 8.36, - "MassiveScenarioClassification (ko)": 6.13, - "MassiveScenarioClassification (lv)": 40.7, - "MassiveScenarioClassification (ml)": 6.98, - "MassiveScenarioClassification (mn)": 27.0, - "MassiveScenarioClassification (ms)": 46.9, - "MassiveScenarioClassification (my)": 9.55, - "MassiveScenarioClassification (nb)": 53.43, - "MassiveScenarioClassification (nl)": 59.65, - "MassiveScenarioClassification (pl)": 49.87, - "MassiveScenarioClassification (pt)": 62.18, - "MassiveScenarioClassification (ro)": 58.22, - "MassiveScenarioClassification (ru)": 40.73, - "MassiveScenarioClassification (sl)": 43.66, - "MassiveScenarioClassification (sq)": 49.25, - "MassiveScenarioClassification (sv)": 57.17, - "MassiveScenarioClassification (sw)": 40.55, - "MassiveScenarioClassification (ta)": 7.46, - "MassiveScenarioClassification (te)": 7.03, - "MassiveScenarioClassification (th)": 8.52, - "MassiveScenarioClassification (tl)": 51.74, - "MassiveScenarioClassification (tr)": 43.01, - "MassiveScenarioClassification (ur)": 9.61, - "MassiveScenarioClassification (vi)": 28.91, - "MassiveScenarioClassification (zh-CN)": 5.86, - "MassiveScenarioClassification (zh-TW)": 7.14, - "ToxicConversationsClassification": 70.95, - "TweetSentimentExtractionClassification": 61.21 + "Model": "rubert-base-cased-sentence", + "GeoreviewClassification": 38.05, + "HeadlineClassification": 67.64, + "InappropriatenessClassification": 58.27, + "KinopoiskClassification": 45.86, + "MassiveIntentClassification (ru)": 49.1, + "MassiveScenarioClassification (ru)": 51.91, + "RuReviewsClassification": 58.34, + "RuSciBenchGRNTIClassification": 52.18, + "RuSciBenchOECDClassification": 40.11 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-t5-xl", - "AlloProfClusteringP2P": 60.37, - "AlloProfClusteringS2S": 40.76, - "ArxivClusteringP2P": 41.62, - "ArxivClusteringS2S": 31.17, - "BiorxivClusteringP2P": 36.43, - "BiorxivClusteringS2S": 26.47, - "HALClusteringS2S": 20.28, - "MLSUMClusteringP2P": 41.61, - "MLSUMClusteringS2S": 33.6, - "MasakhaNEWSClusteringP2P (fra)": 62.82, - "MasakhaNEWSClusteringS2S (fra)": 31.74, - "MedrxivClusteringP2P": 32.3, - "MedrxivClusteringS2S": 26.93, - "RedditClustering": 57.03, - "RedditClusteringP2P": 62.34, - "StackExchangeClustering": 67.13, - "StackExchangeClusteringP2P": 34.79, - "TwentyNewsgroupsClustering": 49.53 + "Model": "rubert-base-cased-sentence", + "GeoreviewClusteringP2P": 41.82, + "MLSUMClusteringP2P (ru)": 43.71, + "MLSUMClusteringS2S (ru)": 45.94, + "RuSciBenchGRNTIClusteringP2P": 46.29, + "RuSciBenchOECDClusteringP2P": 41.28 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sentence-t5-xl", - "OpusparcusPC (fr)": 92.48, - "PawsXPairClassification (fr)": 62.52, - "SprintDuplicateQuestions": 91.44, - "TwitterSemEval2015": 80.89, - "TwitterURLCorpus": 85.86 + "Model": "rubert-base-cased-sentence", + "OpusparcusPC (ru)": 81.52, + "TERRa": 59.12 }, { - "Model": "sentence-t5-xl", - "OpusparcusPC (fr)": 92.48, - "PawsXPairClassification (fr)": 62.59, - "SprintDuplicateQuestions": 91.44, - "TwitterSemEval2015": 80.89, - "TwitterURLCorpus": 85.86 + "Model": "rubert-base-cased-sentence", + "OpusparcusPC (ru)": 81.76, + "TERRa": 59.12 } ] }, "Reranking": { "map": [ { - "Model": "sentence-t5-xl", - "AlloprofReranking": 63.3, - "AskUbuntuDupQuestions": 62.86, - "MindSmallReranking": 29.77, - "SciDocsRR": 75.16, - "StackOverflowDupQuestions": 51.05, - "SyntecReranking": 83.07 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "sentence-t5-xl", - "AlloprofRetrieval": 40.38, - "ArguAna": 39.4, - "BSARDRetrieval": 0.14, - "CQADupstackRetrieval": 40.78, - "ClimateFEVER": 10.61, - "DBPedia": 33.65, - "FEVER": 36.12, - "FiQA2018": 44.71, - "HotpotQA": 37.17, - "MSMARCO": 25.17, - "MintakaRetrieval (fr)": 31.54, - "NFCorpus": 33.18, - "NQ": 46.29, - "QuoraRetrieval": 85.85, - "SCIDOCS": 15.97, - "SciFact": 50.91, - "SyntecRetrieval": 74.24, - "TRECCOVID": 54.77, - "Touche2020": 22.51, - "XPQARetrieval (fr)": 52.14 + "Model": "rubert-base-cased-sentence", + "MIRACLReranking (ru)": 13.77 + }, + { + "Model": "rubert-base-cased-sentence", + "RuBQReranking": 39.89 } ] }, - "STS": { - "cosine_spearman": [ + "Retrieval": { + "ndcg_at_10": [ { - "Model": "sentence-t5-xl", - "BIOSSES": 73.12, - "SICK-R": 79.98, - "SICKFr": 75.08, - "STS12": 79.02, - "STS13": 88.8, - "STS14": 84.33, - "STS15": 88.89, - "STS16": 85.31, - "STS17 (ar-ar)": 11.13, - "STS17 (en-ar)": -3.93, - "STS17 (en-de)": 79.04, - "STS17 (en-en)": 88.91, - "STS17 (en-tr)": 13.61, - "STS17 (es-en)": 71.72, - "STS17 (es-es)": 83.42, - "STS17 (fr-en)": 71.38, - "STS17 (it-en)": 69.5, - "STS17 (ko-ko)": 9.61, - "STS17 (nl-en)": 66.12, - "STS22 (ar)": 29.6, - "STS22 (de)": 47.72, - "STS22 (de-en)": 49.64, - "STS22 (de-fr)": 62.21, - "STS22 (de-pl)": 34.34, - "STS22 (en)": 64.32, - "STS22 (es)": 58.16, - "STS22 (es-en)": 69.15, - "STS22 (es-it)": 65.26, - "STS22 (fr)": 77.49, - "STS22 (fr-pl)": 50.71, - "STS22 (it)": 66.91, - "STS22 (pl)": 27.04, - "STS22 (pl-en)": 58.85, - "STS22 (ru)": 26.63, - "STS22 (tr)": 43.36, - "STS22 (zh)": 33.55, - "STS22 (zh-en)": 29.0, - "STSBenchmark": 83.93, - "STSBenchmarkMultilingualSTS (fr)": 79.42 + "Model": "rubert-base-cased-sentence", + "MIRACLRetrieval (ru)": 1.92, + "RiaNewsRetrieval": 6.72, + "RuBQRetrieval": 12.63 } ] }, - "Summarization": { + "STS": { "cosine_spearman": [ { - "Model": "sentence-t5-xl", - "SummEval": 29.91, - "SummEvalFr": 31.59 + "Model": "rubert-base-cased-sentence", + "RUParaPhraserSTS": 66.24, + "RuSTSBenchmarkSTS": 66.03, + "STS22 (ru)": 51.27, + "STSBenchmarkMultilingualSTS (ru)": 66.71 } ] }, + "Summarization": { + "cosine_spearman": [] + }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "rubert-base-cased-sentence", + "CEDRClassification": 35.55, + "SensitiveTopicsClassification": 20.05 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "LLM2Vec-Mistral-supervised": { + "rubert-tiny": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Mistral-supervised", - "AmazonCounterfactualClassification (en)": 77.58, - "AmazonPolarityClassification": 91.12, - "AmazonReviewsClassification (en)": 49.97, - "Banking77Classification": 88.31, - "EmotionClassification": 52.04, - "ImdbClassification": 87.42, - "MTOPDomainClassification (en)": 96.04, - "MTOPIntentClassification (en)": 84.77, - "MassiveIntentClassification (en)": 79.29, - "MassiveScenarioClassification (en)": 81.64, - "ToxicConversationsClassification": 69.26, - "TweetSentimentExtractionClassification": 62.14 + "Model": "rubert-tiny", + "GeoreviewClassification": 33.45, + "HeadlineClassification": 57.65, + "InappropriatenessClassification": 54.5, + "KinopoiskClassification": 41.36, + "MassiveIntentClassification (ru)": 50.1, + "MassiveScenarioClassification (ru)": 52.15, + "RuReviewsClassification": 49.56, + "RuSciBenchGRNTIClassification": 35.71, + "RuSciBenchOECDClassification": 26.51 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Mistral-supervised", - "ArxivClusteringP2P": 42.81, - "ArxivClusteringS2S": 44.24, - "BiorxivClusteringP2P": 34.27, - "BiorxivClusteringS2S": 35.53, - "MedrxivClusteringP2P": 31.07, - "MedrxivClusteringS2S": 31.27, - "RedditClustering": 60.24, - "RedditClusteringP2P": 64.12, - "StackExchangeClustering": 70.73, - "StackExchangeClusteringP2P": 34.5, - "TwentyNewsgroupsClustering": 52.18 + "Model": "rubert-tiny", + "GeoreviewClusteringP2P": 34.4, + "RuSciBenchGRNTIClusteringP2P": 29.89, + "RuSciBenchOECDClusteringP2P": 27.98 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Mistral-supervised", - "SprintDuplicateQuestions": 96.82, - "TwitterSemEval2015": 80.6, - "TwitterURLCorpus": 86.56 + "Model": "rubert-tiny", + "TERRa": 51.06 }, { - "Model": "LLM2Vec-Mistral-supervised", - "SprintDuplicateQuestions": 96.82, - "TwitterSemEval2015": 80.6, - "TwitterURLCorpus": 86.56 + "Model": "rubert-tiny", + "TERRa": 52.85 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Mistral-supervised", - "AskUbuntuDupQuestions": 63.98, - "MindSmallReranking": 31.5, - "SciDocsRR": 83.8, - "StackOverflowDupQuestions": 54.41 + "Model": "rubert-tiny", + "RuBQReranking": 35.44 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Mistral-supervised", - "ArguAna": 57.48, - "CQADupstackRetrieval": 48.84, - "ClimateFEVER": 35.19, - "DBPedia": 49.58, - "FEVER": 89.4, - "FiQA2018": 53.11, - "HotpotQA": 74.07, - "MSMARCO": 42.17, - "NFCorpus": 39.33, - "NQ": 61.7, - "QuoraRetrieval": 87.75, - "SCIDOCS": 22.5, - "SciFact": 78.86, - "TRECCOVID": 77.69, - "Touche2020": 22.18 + "Model": "rubert-tiny", + "RiaNewsRetrieval": 0.79, + "RuBQRetrieval": 3.24 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Mistral-supervised", - "BIOSSES": 85.24, - "SICK-R": 83.7, - "STS12": 78.8, - "STS13": 86.37, - "STS14": 84.04, - "STS15": 88.99, - "STS16": 87.22, - "STS17 (en-en)": 90.19, - "STS22 (en)": 67.68, - "STSBenchmark": 88.65 + "Model": "rubert-tiny", + "RUParaPhraserSTS": 53.41, + "RuSTSBenchmarkSTS": 58.16, + "STS22 (ru)": 47.88 } ] }, "Summarization": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [ { - "Model": "LLM2Vec-Mistral-supervised", - "SummEval": 29.96 + "Model": "rubert-tiny", + "CEDRClassification": 37.39, + "SensitiveTopicsClassification": 18.54 } ] }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { "p-MRR": [] } }, - "voyage-lite-02-instruct": { + "rubert-tiny-turbo": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "rubert-tiny-turbo", + "Tatoeba (rus-eng)": 83.14 + } + ] }, "Classification": { "accuracy": [ { - "Model": "voyage-lite-02-instruct", - "AmazonCounterfactualClassification (en)": 88.31, - "AmazonPolarityClassification": 96.32, - "AmazonReviewsClassification (en)": 56.25, - "Banking77Classification": 88.59, - "EmotionClassification": 50.28, - "ImdbClassification": 95.75, - "MTOPDomainClassification (en)": 97.65, - "MTOPIntentClassification (en)": 75.16, - "MassiveIntentClassification (en)": 73.97, - "MassiveScenarioClassification (en)": 83.99, - "ToxicConversationsClassification": 81.75, - "TweetSentimentExtractionClassification": 62.98 + "Model": "rubert-tiny-turbo", + "AmazonPolarityClassification": 68.36, + "Banking77Classification": 59.86, + "EmotionClassification": 29.5, + "GeoreviewClassification": 41.36, + "HeadlineClassification": 68.9, + "ImdbClassification": 58.36, + "InappropriatenessClassification": 59.11, + "KinopoiskClassification": 50.47, + "MassiveIntentClassification (zh-CN)": 5.21, + "MassiveIntentClassification (ko)": 2.53, + "MassiveIntentClassification (hi)": 2.56, + "MassiveIntentClassification (kn)": 2.06, + "MassiveIntentClassification (ka)": 2.64, + "MassiveIntentClassification (am)": 2.28, + "MassiveIntentClassification (my)": 3.96, + "MassiveIntentClassification (el)": 9.66, + "MassiveIntentClassification (lv)": 22.32, + "MassiveIntentClassification (ml)": 2.39, + "MassiveIntentClassification (mn)": 28.99, + "MassiveIntentClassification (ur)": 2.45, + "MassiveIntentClassification (fa)": 3.34, + "MassiveIntentClassification (ro)": 31.72, + "MassiveIntentClassification (is)": 24.85, + "MassiveIntentClassification (en)": 50.16, + "MassiveIntentClassification (hu)": 25.52, + "MassiveIntentClassification (fr)": 31.51, + "MassiveIntentClassification (th)": 3.74, + "MassiveIntentClassification (de)": 32.1, + "MassiveIntentClassification (tr)": 27.56, + "MassiveIntentClassification (pt)": 34.35, + "MassiveIntentClassification (sq)": 32.38, + "MassiveIntentClassification (zh-TW)": 6.81, + "MassiveIntentClassification (hy)": 2.72, + "MassiveIntentClassification (da)": 33.95, + "MassiveIntentClassification (af)": 30.4, + "MassiveIntentClassification (ar)": 3.8, + "MassiveIntentClassification (jv)": 28.53, + "MassiveIntentClassification (te)": 2.21, + "MassiveIntentClassification (tl)": 32.02, + "MassiveIntentClassification (sw)": 27.79, + "MassiveIntentClassification (ja)": 5.61, + "MassiveIntentClassification (ms)": 28.94, + "MassiveIntentClassification (nb)": 32.3, + "MassiveIntentClassification (fi)": 31.13, + "MassiveIntentClassification (id)": 33.56, + "MassiveIntentClassification (cy)": 31.68, + "MassiveIntentClassification (sl)": 31.39, + "MassiveIntentClassification (es)": 31.03, + "MassiveIntentClassification (bn)": 3.08, + "MassiveIntentClassification (sv)": 30.23, + "MassiveIntentClassification (ru)": 57.98, + "MassiveIntentClassification (az)": 23.58, + "MassiveIntentClassification (it)": 35.24, + "MassiveIntentClassification (pl)": 26.82, + "MassiveIntentClassification (vi)": 23.72, + "MassiveIntentClassification (ta)": 1.5, + "MassiveIntentClassification (he)": 2.25, + "MassiveIntentClassification (nl)": 32.44, + "MassiveIntentClassification (km)": 5.14, + "MassiveScenarioClassification (zh-CN)": 10.6, + "MassiveScenarioClassification (ko)": 5.63, + "MassiveScenarioClassification (hi)": 7.41, + "MassiveScenarioClassification (kn)": 7.6, + "MassiveScenarioClassification (ka)": 7.01, + "MassiveScenarioClassification (am)": 7.68, + "MassiveScenarioClassification (my)": 10.73, + "MassiveScenarioClassification (el)": 17.95, + "MassiveScenarioClassification (lv)": 29.29, + "MassiveScenarioClassification (ml)": 6.92, + "MassiveScenarioClassification (mn)": 33.7, + "MassiveScenarioClassification (ur)": 8.53, + "MassiveScenarioClassification (fa)": 6.62, + "MassiveScenarioClassification (ro)": 40.02, + "MassiveScenarioClassification (is)": 33.1, + "MassiveScenarioClassification (en)": 61.29, + "MassiveScenarioClassification (hu)": 36.41, + "MassiveScenarioClassification (fr)": 42.9, + "MassiveScenarioClassification (th)": 8.26, + "MassiveScenarioClassification (de)": 42.07, + "MassiveScenarioClassification (tr)": 34.85, + "MassiveScenarioClassification (pt)": 40.79, + "MassiveScenarioClassification (sq)": 42.66, + "MassiveScenarioClassification (zh-TW)": 11.93, + "MassiveScenarioClassification (hy)": 8.78, + "MassiveScenarioClassification (da)": 43.69, + "MassiveScenarioClassification (af)": 40.84, + "MassiveScenarioClassification (ar)": 11.86, + "MassiveScenarioClassification (jv)": 37.23, + "MassiveScenarioClassification (te)": 6.91, + "MassiveScenarioClassification (tl)": 38.16, + "MassiveScenarioClassification (sw)": 35.66, + "MassiveScenarioClassification (ja)": 10.6, + "MassiveScenarioClassification (ms)": 38.97, + "MassiveScenarioClassification (nb)": 39.05, + "MassiveScenarioClassification (fi)": 35.19, + "MassiveScenarioClassification (id)": 39.54, + "MassiveScenarioClassification (cy)": 39.85, + "MassiveScenarioClassification (sl)": 35.98, + "MassiveScenarioClassification (es)": 37.13, + "MassiveScenarioClassification (bn)": 8.85, + "MassiveScenarioClassification (sv)": 36.12, + "MassiveScenarioClassification (ru)": 62.9, + "MassiveScenarioClassification (az)": 30.32, + "MassiveScenarioClassification (it)": 42.69, + "MassiveScenarioClassification (pl)": 31.62, + "MassiveScenarioClassification (vi)": 31.89, + "MassiveScenarioClassification (ta)": 7.01, + "MassiveScenarioClassification (he)": 7.61, + "MassiveScenarioClassification (nl)": 40.94, + "MassiveScenarioClassification (km)": 8.51, + "RuReviewsClassification": 60.66, + "RuSciBenchGRNTIClassification": 52.93, + "RuSciBenchOECDClassification": 40.79, + "ToxicConversationsClassification": 57.77, + "TweetSentimentExtractionClassification": 55.3 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-lite-02-instruct", - "ArxivClusteringP2P": 51.95, - "ArxivClusteringS2S": 42.48, - "BiorxivClusteringP2P": 50.15, - "BiorxivClusteringS2S": 42.84, - "MedrxivClusteringP2P": 47.24, - "MedrxivClusteringS2S": 43.48, - "RedditClustering": 63.73, - "RedditClusteringP2P": 64.09, - "StackExchangeClustering": 70.71, - "StackExchangeClusteringP2P": 40.34, - "TwentyNewsgroupsClustering": 59.56 + "Model": "rubert-tiny-turbo", + "ArxivClusteringP2P": 24.83, + "ArxivClusteringS2S": 16.68, + "BiorxivClusteringP2P": 20.0, + "BiorxivClusteringS2S": 12.67, + "GeoreviewClusteringP2P": 59.71, + "MLSUMClusteringP2P (ru)": 40.02, + "MLSUMClusteringS2S (ru)": 41.36, + "MedrxivClusteringP2P": 20.79, + "MedrxivClusteringS2S": 18.18, + "RedditClustering": 26.28, + "RedditClusteringP2P": 40.48, + "RuSciBenchGRNTIClusteringP2P": 47.55, + "RuSciBenchOECDClusteringP2P": 41.44, + "StackExchangeClustering": 33.51, + "StackExchangeClusteringP2P": 27.98, + "TwentyNewsgroupsClustering": 19.9 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-lite-02-instruct", - "SprintDuplicateQuestions": 98.07, - "TwitterSemEval2015": 74.44, - "TwitterURLCorpus": 88.11 + "Model": "rubert-tiny-turbo", + "OpusparcusPC (ru)": 87.58, + "TERRa": 56.09 }, { - "Model": "voyage-lite-02-instruct", - "SprintDuplicateQuestions": 98.07, - "TwitterSemEval2015": 74.44, - "TwitterURLCorpus": 88.11 + "Model": "rubert-tiny-turbo", + "OpusparcusPC (ru)": 87.58, + "TERRa": 56.27 } ] }, "Reranking": { "map": [ { - "Model": "voyage-lite-02-instruct", - "AskUbuntuDupQuestions": 63.24, - "MindSmallReranking": 31.48, - "SciDocsRR": 84.68, - "StackOverflowDupQuestions": 53.56 + "Model": "rubert-tiny-turbo", + "MIRACLReranking (ru)": 47.73 + }, + { + "Model": "rubert-tiny-turbo", + "RuBQReranking": 62.15 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-lite-02-instruct", - "ArguAna": 70.28, - "CQADupstackRetrieval": 46.2, - "ClimateFEVER": 31.95, - "DBPedia": 39.79, - "FEVER": 91.35, - "FiQA2018": 52.51, - "HotpotQA": 75.51, - "MSMARCO": 37.93, - "NFCorpus": 43.7, - "NQ": 64.26, - "QuoraRetrieval": 87.62, - "SCIDOCS": 20.24, - "SciFact": 79.91, - "TRECCOVID": 81.02, - "Touche2020": 26.8 + "Model": "rubert-tiny-turbo", + "AILACasedocs": 7.43, + "AILAStatutes": 13.62, + "ARCChallenge": 3.85, + "AlphaNLI": 14.15, + "ArguAna": 32.03, + "ClimateFEVER": 5.56, + "DBPedia": 9.61, + "MIRACLRetrieval (ru)": 37.07, + "RiaNewsRetrieval": 51.27, + "RuBQRetrieval": 51.73 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-lite-02-instruct", - "BIOSSES": 89.7, - "SICK-R": 78.44, - "STS12": 86.46, - "STS13": 87.76, - "STS14": 86.6, - "STS15": 90.1, - "STS16": 86.39, - "STS17 (en-en)": 86.98, - "STS22 (en)": 76.89, - "STSBenchmark": 88.56 + "Model": "rubert-tiny-turbo", + "RUParaPhraserSTS": 72.15, + "RuSTSBenchmarkSTS": 78.48, + "STS22 (zh)": 32.83, + "STS22 (de-fr)": 17.5, + "STS22 (pl-en)": 42.08, + "STS22 (ru)": 60.06, + "STS22 (fr)": 42.0, + "STS22 (de)": 8.16, + "STS22 (tr)": 15.46, + "STS22 (de-en)": 21.55, + "STS22 (it)": 39.69, + "STS22 (pl)": 9.71, + "STS22 (fr-pl)": 39.44, + "STS22 (de-pl)": 25.53, + "STS22 (ar)": 27.95, + "STS22 (es-en)": 42.77, + "STS22 (es-it)": 32.83, + "STS22 (es)": 45.31, + "STS22 (zh-en)": 31.25, + "STS22 (en)": 47.06, + "STSBenchmarkMultilingualSTS (ru)": 78.12 } ] }, "Summarization": { - "cosine_spearman": [ + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [ { - "Model": "voyage-lite-02-instruct", - "SummEval": 31.01 + "Model": "rubert-tiny-turbo", + "CEDRClassification": 38.95, + "SensitiveTopicsClassification": 24.44 } ] }, - "MultilabelClassification": { - "accuracy": [] - }, "InstructionRetrieval": { "p-MRR": [] } }, - "text-embedding-ada-002-instruct": { + "rubert-tiny2": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "rubert-tiny2", + "GeoreviewClassification": 39.64, + "HeadlineClassification": 74.19, + "InappropriatenessClassification": 58.57, + "KinopoiskClassification": 49.06, + "MassiveIntentClassification (ru)": 50.83, + "MassiveScenarioClassification (ru)": 59.15, + "RuReviewsClassification": 56.99, + "RuSciBenchGRNTIClassification": 45.63, + "RuSciBenchOECDClassification": 35.48 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "rubert-tiny2", + "GeoreviewClusteringP2P": 41.58, + "RuSciBenchGRNTIClusteringP2P": 39.78, + "RuSciBenchOECDClusteringP2P": 35.98 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "rubert-tiny2", + "TERRa": 51.87 + }, + { + "Model": "rubert-tiny2", + "TERRa": 51.87 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "rubert-tiny2", + "MIRACLReranking (ru)": 15.81 + }, + { + "Model": "rubert-tiny2", + "RuBQReranking": 46.09 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-ada-002-instruct", - "ARCChallenge": 11.85, - "AlphaNLI": 10.62, - "HellaSwag": 24.8, - "PIQA": 23.87, - "Quail": 5.79, - "RARbCode": 82.36, - "RARbMath": 67.26, - "SIQA": 2.64, - "SpartQA": 4.75, - "TempReasonL1": 1.44, - "TempReasonL2Fact": 19.38, - "TempReasonL2Pure": 2.43, - "TempReasonL3Fact": 17.58, - "TempReasonL3Pure": 7.31, - "WinoGrande": 11.36 + "Model": "rubert-tiny2", + "MIRACLRetrieval (ru)": 1.89, + "RiaNewsRetrieval": 13.92, + "RuBQRetrieval": 10.87 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-tiny2", + "RUParaPhraserSTS": 65.14, + "RuSTSBenchmarkSTS": 69.43, + "STS22 (ru)": 50.23 + } + ] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "rubert-tiny2", + "CEDRClassification": 36.87, + "SensitiveTopicsClassification": 22.03 + } + ] }, "InstructionRetrieval": { "p-MRR": [] } }, - "text-embedding-3-large": { + "sbert_large_mt_nlu_ru": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-3-large", - "AmazonCounterfactualClassification (en)": 78.93, - "AmazonPolarityClassification": 92.85, - "AmazonReviewsClassification (en)": 48.7, - "Banking77Classification": 85.69, - "EmotionClassification": 51.58, - "ImdbClassification": 87.67, - "MTOPDomainClassification (en)": 95.36, - "MTOPIntentClassification (en)": 75.07, - "MassiveIntentClassification (en)": 74.64, - "MassiveScenarioClassification (en)": 79.79, - "ToxicConversationsClassification": 72.92, - "TweetSentimentExtractionClassification": 62.22 + "Model": "sbert_large_mt_nlu_ru", + "GeoreviewClassification": 39.67, + "HeadlineClassification": 77.19, + "InappropriatenessClassification": 64.64, + "KinopoiskClassification": 50.33, + "MassiveIntentClassification (ru)": 61.42, + "MassiveScenarioClassification (ru)": 68.13, + "RuReviewsClassification": 58.29, + "RuSciBenchGRNTIClassification": 54.19, + "RuSciBenchOECDClassification": 43.8 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-3-large", - "ArxivClusteringP2P": 49.01, - "ArxivClusteringS2S": 44.45, - "BiorxivClusteringP2P": 38.03, - "BiorxivClusteringS2S": 36.53, - "MedrxivClusteringP2P": 32.7, - "MedrxivClusteringS2S": 31.27, - "RedditClustering": 67.84, - "RedditClusteringP2P": 67.96, - "StackExchangeClustering": 76.26, - "StackExchangeClusteringP2P": 36.88, - "TwentyNewsgroupsClustering": 58.14 + "Model": "sbert_large_mt_nlu_ru", + "GeoreviewClusteringP2P": 57.07, + "RuSciBenchGRNTIClusteringP2P": 51.44, + "RuSciBenchOECDClusteringP2P": 45.36 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-embedding-3-large", - "SprintDuplicateQuestions": 92.25, - "TwitterSemEval2015": 77.13, - "TwitterURLCorpus": 87.78 + "Model": "sbert_large_mt_nlu_ru", + "TERRa": 51.97 + }, + { + "Model": "sbert_large_mt_nlu_ru", + "TERRa": 51.97 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-3-large", - "AskUbuntuDupQuestions": 65.03, - "MindSmallReranking": 29.86, - "SciDocsRR": 86.66, - "StackOverflowDupQuestions": 55.08 + "Model": "sbert_large_mt_nlu_ru", + "MIRACLReranking (ru)": 24.99 + }, + { + "Model": "sbert_large_mt_nlu_ru", + "RuBQReranking": 56.14 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-large", - "AILACasedocs": 39.0, - "AILAStatutes": 41.31, - "ARCChallenge": 23.98, - "AlphaNLI": 37.27, - "ArguAna": 58.05, - "BrightRetrieval (theoremqa_questions)": 22.22, - "BrightRetrieval (leetcode)": 23.65, - "BrightRetrieval (earth_science)": 26.27, - "BrightRetrieval (psychology)": 27.52, - "BrightRetrieval (robotics)": 12.93, - "BrightRetrieval (economics)": 19.98, - "BrightRetrieval (stackoverflow)": 12.49, - "BrightRetrieval (biology)": 23.67, - "BrightRetrieval (theoremqa_theorems)": 10.82, - "BrightRetrieval (pony)": 2.45, - "BrightRetrieval (sustainable_living)": 20.32, - "BrightRetrieval (aops)": 8.45, - "CQADupstackRetrieval": 47.54, - "ClimateFEVER": 30.27, - "DBPedia": 44.76, - "FEVER": 87.94, - "FiQA2018": 55.0, - "GerDaLIRSmall": 32.77, - "HellaSwag": 34.12, - "HotpotQA": 71.58, - "LEMBNarrativeQARetrieval": 44.09, - "LEMBQMSumRetrieval": 32.49, - "LEMBSummScreenFDRetrieval": 84.8, - "LEMBWikimQARetrieval": 54.16, - "LeCaRDv2": 57.2, - "LegalBenchConsumerContractsQA": 79.39, - "LegalBenchCorporateLobbying": 95.09, - "LegalQuAD": 57.47, - "LegalSummarization": 71.55, - "MSMARCO": 40.24, - "NFCorpus": 42.07, - "NQ": 61.27, - "PIQA": 41.96, - "Quail": 10.15, - "QuoraRetrieval": 89.05, - "RARbCode": 89.64, - "RARbMath": 90.08, - "SCIDOCS": 23.11, - "SIQA": 3.44, - "SciFact": 77.77, - "SpartQA": 7.51, - "TRECCOVID": 79.56, - "TempReasonL1": 2.13, - "TempReasonL2Fact": 28.65, - "TempReasonL2Pure": 10.34, - "TempReasonL3Fact": 25.52, - "TempReasonL3Pure": 15.28, - "Touche2020": 23.35, - "WinoGrande": 29.11 - }, - { - "Model": "text-embedding-3-large", - "LEMBNeedleRetrieval": 29.25, - "LEMBPasskeyRetrieval": 63.0 - } - ], - "recall_at_1": [ - { - "Model": "text-embedding-3-large", - "BrightRetrieval (earth_science)": 32.26, - "BrightRetrieval (sustainable_living)": 26.34, - "BrightRetrieval (economics)": 24.76, - "BrightRetrieval (stackoverflow)": 11.54, - "BrightRetrieval (pony)": 0.0, - "BrightRetrieval (biology)": 33.09, - "BrightRetrieval (robotics)": 11.88, - "BrightRetrieval (psychology)": 35.15 + "Model": "sbert_large_mt_nlu_ru", + "MIRACLRetrieval (ru)": 6.2, + "RiaNewsRetrieval": 21.4, + "RuBQRetrieval": 29.8 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-embedding-3-large", - "BIOSSES": 84.68, - "SICK-R": 79.0, - "STS12": 72.84, - "STS13": 86.1, - "STS14": 81.15, - "STS15": 88.49, - "STS16": 85.08, - "STS17 (en-en)": 90.22, - "STS22 (en)": 66.14, - "STSBenchmark": 83.56 + "Model": "sbert_large_mt_nlu_ru", + "RUParaPhraserSTS": 65.17, + "RuSTSBenchmarkSTS": 71.22, + "STS22 (ru)": 56.82 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "text-embedding-3-large", - "SummEval": 29.92 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "accuracy": [ { - "Model": "text-embedding-3-large", - "Core17InstructionRetrieval": -0.2, - "News21InstructionRetrieval": -2.03, - "Robust04InstructionRetrieval": -5.81 + "Model": "sbert_large_mt_nlu_ru", + "CEDRClassification": 36.81, + "SensitiveTopicsClassification": 28.54 } ] + }, + "InstructionRetrieval": { + "p-MRR": [] } }, - "rubert-tiny": { + "sbert_large_nlu_ru": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "rubert-tiny", - "GeoreviewClassification (rus-Cyrl)": 33.45, - "HeadlineClassification (rus-Cyrl)": 57.65, - "InappropriatenessClassification (rus-Cyrl)": 54.5, - "KinopoiskClassification (rus-Cyrl)": 41.36, - "MassiveIntentClassification (rus-Cyrl)": 50.1, - "MassiveScenarioClassification (rus-Cyrl)": 52.15, - "RuReviewsClassification (rus-Cyrl)": 49.56, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 35.71, - "RuSciBenchOECDClassification (rus-Cyrl)": 26.51 + "Model": "sbert_large_nlu_ru", + "GeoreviewClassification": 39.97, + "HeadlineClassification": 79.26, + "InappropriatenessClassification": 62.52, + "KinopoiskClassification": 49.51, + "MassiveIntentClassification (ru)": 61.09, + "MassiveScenarioClassification (ru)": 67.6, + "RuReviewsClassification": 58.27, + "RuSciBenchGRNTIClassification": 53.9, + "RuSciBenchOECDClassification": 43.04 } ] }, "Clustering": { "v_measure": [ { - "Model": "rubert-tiny", - "GeoreviewClusteringP2P (rus-Cyrl)": 34.4, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 29.89, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 27.98 + "Model": "sbert_large_nlu_ru", + "GeoreviewClusteringP2P": 57.12, + "RuSciBenchGRNTIClusteringP2P": 49.7, + "RuSciBenchOECDClusteringP2P": 44.48 } ] }, "PairClassification": { "max_ap": [ { - "Model": "rubert-tiny", - "TERRa (rus-Cyrl)": 51.06 + "Model": "sbert_large_nlu_ru", + "TERRa": 50.17 }, { - "Model": "rubert-tiny", - "TERRa (rus-Cyrl)": 52.85 + "Model": "sbert_large_nlu_ru", + "TERRa": 50.17 } ] }, "Reranking": { "map": [ { - "Model": "rubert-tiny", - "RuBQReranking (rus-Cyrl)": 35.44 + "Model": "sbert_large_nlu_ru", + "MIRACLReranking (ru)": 18.8 + }, + { + "Model": "sbert_large_nlu_ru", + "RuBQReranking": 46.81 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "rubert-tiny", - "RiaNewsRetrieval (rus-Cyrl)": 0.79, - "RuBQRetrieval (rus-Cyrl)": 3.24 + "Model": "sbert_large_nlu_ru", + "MIRACLRetrieval (ru)": 1.98, + "RiaNewsRetrieval": 11.11, + "RuBQRetrieval": 12.45 } ] }, "STS": { "cosine_spearman": [ { - "Model": "rubert-tiny", - "RUParaPhraserSTS (rus-Cyrl)": 53.41, - "RuSTSBenchmarkSTS (rus-Cyrl)": 58.16, - "STS22 (rus-Cyrl)": 47.88 + "Model": "sbert_large_nlu_ru", + "RUParaPhraserSTS": 62.06, + "RuSTSBenchmarkSTS": 58.82, + "STS22 (ru)": 50.75 } ] }, @@ -17657,9 +17923,9 @@ "MultilabelClassification": { "accuracy": [ { - "Model": "rubert-tiny", - "CEDRClassification (rus-Cyrl)": 37.39, - "SensitiveTopicsClassification (rus-Cyrl)": 18.54 + "Model": "sbert_large_nlu_ru", + "CEDRClassification": 35.84, + "SensitiveTopicsClassification": 27.9 } ] }, @@ -17667,87 +17933,139 @@ "p-MRR": [] } }, - "bert-base-multilingual-cased": { + "sentence-bert-swedish-cased": { + "BitextMining": { + "f1": [ + { + "Model": "sentence-bert-swedish-cased", + "BornholmBitextMining": 14.08 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "sentence-bert-swedish-cased", + "AngryTweetsClassification": 44.46, + "DKHateClassification": 59.36, + "DanishPoliticalCommentsClassification": 28.32, + "LccSentimentClassification": 47.2, + "MassiveIntentClassification": 69.11, + "MassiveScenarioClassification": 75.96, + "NoRecClassification": 43.53, + "NordicLangClassification": 51.45, + "NorwegianParliament": 55.74, + "ScalaDaClassification": 50.12, + "ScalaNbClassification": 50.34 + } + ] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "sentence-camembert-base": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bert-base-multilingual-cased", - "AmazonReviewsClassification (fr)": 29.39, - "MTOPDomainClassification (fr)": 63.61, - "MTOPIntentClassification (fr)": 37.84, - "MasakhaNEWSClassification (fra)": 64.0, - "MassiveIntentClassification (fr)": 37.3, - "MassiveScenarioClassification (fr)": 44.47 + "Model": "sentence-camembert-base", + "AmazonReviewsClassification": 36.03, + "MTOPDomainClassification": 77.1, + "MTOPIntentClassification": 43.44, + "MasakhaNEWSClassification": 70.36, + "MassiveIntentClassification": 51.59, + "MassiveScenarioClassification": 61.28 } ] }, "Clustering": { "v_measure": [ { - "Model": "bert-base-multilingual-cased", - "AlloProfClusteringP2P": 51.5, - "AlloProfClusteringS2S": 43.06, - "HALClusteringS2S": 20.81, - "MLSUMClusteringP2P": 40.9, - "MLSUMClusteringS2S": 31.8, - "MasakhaNEWSClusteringP2P (fra)": 24.23, - "MasakhaNEWSClusteringS2S (fra)": 24.46 + "Model": "sentence-camembert-base", + "AlloProfClusteringP2P": 59.09, + "AlloProfClusteringS2S": 38.92, + "HALClusteringS2S": 20.22, + "MLSUMClusteringP2P": 35.98, + "MLSUMClusteringS2S": 27.05, + "MasakhaNEWSClusteringP2P": 36.03, + "MasakhaNEWSClusteringS2S": 30.77 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bert-base-multilingual-cased", - "OpusparcusPC (fr)": 86.77, - "PawsXPairClassification (fr)": 53.39 + "Model": "sentence-camembert-base", + "OpusparcusPC": 92.05, + "PawsXPairClassification": 57.44 }, { - "Model": "bert-base-multilingual-cased", - "OpusparcusPC (fr)": 87.76, - "PawsXPairClassification (fr)": 53.41 + "Model": "sentence-camembert-base", + "OpusparcusPC": 92.05, + "PawsXPairClassification": 57.44 } ] }, "Reranking": { "map": [ { - "Model": "bert-base-multilingual-cased", - "AlloprofReranking": 36.23, - "SyntecReranking": 53.25 + "Model": "sentence-camembert-base", + "AlloprofReranking": 48.68, + "SyntecReranking": 79.75 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-multilingual-cased", - "AlloprofRetrieval": 1.63, + "Model": "sentence-camembert-base", + "AlloprofRetrieval": 21.94, "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 3.55, - "SyntecRetrieval": 18.95, - "XPQARetrieval (fr)": 18.49 + "MintakaRetrieval": 13.36, + "SyntecRetrieval": 68.62, + "XPQARetrieval": 57.92 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bert-base-multilingual-cased", - "SICKFr": 58.75, - "STS22 (fr)": 39.05, - "STSBenchmarkMultilingualSTS (fr)": 52.25 + "Model": "sentence-camembert-base", + "SICKFr": 74.18, + "STS22": 77.54, + "STSBenchmarkMultilingualSTS": 81.64 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "bert-base-multilingual-cased", - "SummEvalFr": 28.81 + "Model": "sentence-camembert-base", + "SummEvalFr": 28.77 } ] }, @@ -17758,118 +18076,87 @@ "p-MRR": [] } }, - "gtr-t5-xxl": { + "sentence-camembert-large": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "gtr-t5-xxl", - "AmazonCounterfactualClassification (en)": 67.3, - "AmazonPolarityClassification": 75.05, - "AmazonReviewsClassification (en)": 37.3, - "Banking77Classification": 82.32, - "EmotionClassification": 43.19, - "ImdbClassification": 70.8, - "MTOPDomainClassification (en)": 93.84, - "MTOPIntentClassification (en)": 67.71, - "MassiveIntentClassification (en)": 70.61, - "MassiveScenarioClassification (en)": 77.77, - "ToxicConversationsClassification": 68.48, - "TweetSentimentExtractionClassification": 54.54 + "Model": "sentence-camembert-large", + "AmazonReviewsClassification": 37.97, + "MTOPDomainClassification": 85.74, + "MTOPIntentClassification": 58.62, + "MasakhaNEWSClassification": 80.62, + "MassiveIntentClassification": 62.65, + "MassiveScenarioClassification": 69.29 } ] }, "Clustering": { "v_measure": [ { - "Model": "gtr-t5-xxl", - "ArxivClusteringP2P": 37.9, - "ArxivClusteringS2S": 32.39, - "BiorxivClusteringP2P": 30.48, - "BiorxivClusteringS2S": 27.5, - "MedrxivClusteringP2P": 29.12, - "MedrxivClusteringS2S": 27.56, - "RedditClustering": 64.13, - "RedditClusteringP2P": 62.84, - "StackExchangeClustering": 71.43, - "StackExchangeClusteringP2P": 32.85, - "TwentyNewsgroupsClustering": 50.44 + "Model": "sentence-camembert-large", + "AlloProfClusteringP2P": 62.69, + "AlloProfClusteringS2S": 42.06, + "HALClusteringS2S": 23.9, + "MLSUMClusteringP2P": 42.04, + "MLSUMClusteringS2S": 32.29, + "MasakhaNEWSClusteringP2P": 54.51, + "MasakhaNEWSClusteringS2S": 44.73 } ] }, "PairClassification": { "max_ap": [ { - "Model": "gtr-t5-xxl", - "SprintDuplicateQuestions": 95.68, - "TwitterSemEval2015": 77.54, - "TwitterURLCorpus": 85.13 + "Model": "sentence-camembert-large", + "OpusparcusPC": 94.63, + "PawsXPairClassification": 59.59 }, { - "Model": "gtr-t5-xxl", - "SprintDuplicateQuestions": 95.68, - "TwitterSemEval2015": 77.54, - "TwitterURLCorpus": 85.13 + "Model": "sentence-camembert-large", + "OpusparcusPC": 94.63, + "PawsXPairClassification": 59.61 } ] }, "Reranking": { "map": [ { - "Model": "gtr-t5-xxl", - "AskUbuntuDupQuestions": 63.23, - "MindSmallReranking": 31.93, - "SciDocsRR": 77.96, - "StackOverflowDupQuestions": 53.5 + "Model": "sentence-camembert-large", + "AlloprofReranking": 57.62, + "SyntecReranking": 88.15 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "gtr-t5-xxl", - "ArguAna": 53.77, - "CQADupstackRetrieval": 38.56, - "ClimateFEVER": 27.21, - "DBPedia": 41.28, - "FEVER": 74.08, - "FiQA2018": 46.78, - "HotpotQA": 59.67, - "MSMARCO": 44.05, - "NFCorpus": 34.18, - "NQ": 57.24, - "QuoraRetrieval": 89.09, - "SCIDOCS": 15.88, - "SciFact": 66.77, - "TRECCOVID": 51.9, - "Touche2020": 26.76 + "Model": "sentence-camembert-large", + "AlloprofRetrieval": 31.62, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 21.87, + "SyntecRetrieval": 81.11, + "XPQARetrieval": 65.62 } ] }, "STS": { "cosine_spearman": [ { - "Model": "gtr-t5-xxl", - "BIOSSES": 81.91, - "SICK-R": 74.29, - "STS12": 70.12, - "STS13": 82.72, - "STS14": 78.24, - "STS15": 86.26, - "STS16": 81.61, - "STS17 (en-en)": 85.18, - "STS22 (en)": 65.76, - "STSBenchmark": 77.73 + "Model": "sentence-camembert-large", + "SICKFr": 77.7, + "STS22": 81.73, + "STSBenchmarkMultilingualSTS": 85.79 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "gtr-t5-xxl", - "SummEval": 30.64 + "Model": "sentence-camembert-large", + "SummEvalFr": 30.88 } ] }, @@ -17880,87 +18167,87 @@ "p-MRR": [] } }, - "sentence-camembert-base": { + "sentence-croissant-llm-base": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "sentence-camembert-base", - "AmazonReviewsClassification (fr)": 36.03, - "MTOPDomainClassification (fr)": 77.1, - "MTOPIntentClassification (fr)": 43.44, - "MasakhaNEWSClassification (fra)": 70.36, - "MassiveIntentClassification (fr)": 51.59, - "MassiveScenarioClassification (fr)": 61.28 + "Model": "sentence-croissant-llm-base", + "AmazonReviewsClassification": 34.79, + "MTOPDomainClassification": 85.52, + "MTOPIntentClassification": 63.12, + "MasakhaNEWSClassification": 79.29, + "MassiveIntentClassification": 59.41, + "MassiveScenarioClassification": 65.29 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-camembert-base", - "AlloProfClusteringP2P": 59.09, - "AlloProfClusteringS2S": 38.92, - "HALClusteringS2S": 20.22, - "MLSUMClusteringP2P": 35.98, - "MLSUMClusteringS2S": 27.05, - "MasakhaNEWSClusteringP2P (fra)": 36.03, - "MasakhaNEWSClusteringS2S (fra)": 30.77 + "Model": "sentence-croissant-llm-base", + "AlloProfClusteringP2P": 64.12, + "AlloProfClusteringS2S": 32.52, + "HALClusteringS2S": 23.4, + "MLSUMClusteringP2P": 42.94, + "MLSUMClusteringS2S": 33.91, + "MasakhaNEWSClusteringP2P": 53.94, + "MasakhaNEWSClusteringS2S": 41.05 } ] }, "PairClassification": { "max_ap": [ { - "Model": "sentence-camembert-base", - "OpusparcusPC (fr)": 92.05, - "PawsXPairClassification (fr)": 57.44 + "Model": "sentence-croissant-llm-base", + "OpusparcusPC": 91.42, + "PawsXPairClassification": 63.13 }, { - "Model": "sentence-camembert-base", - "OpusparcusPC (fr)": 92.05, - "PawsXPairClassification (fr)": 57.44 + "Model": "sentence-croissant-llm-base", + "OpusparcusPC": 91.42, + "PawsXPairClassification": 63.19 } ] }, "Reranking": { "map": [ { - "Model": "sentence-camembert-base", - "AlloprofReranking": 48.68, - "SyntecReranking": 79.75 + "Model": "sentence-croissant-llm-base", + "AlloprofReranking": 53.0, + "SyntecReranking": 82.9 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-camembert-base", - "AlloprofRetrieval": 21.94, + "Model": "sentence-croissant-llm-base", + "AlloprofRetrieval": 29.97, "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 13.36, - "SyntecRetrieval": 68.62, - "XPQARetrieval (fr)": 57.92 + "MintakaRetrieval": 21.31, + "SyntecRetrieval": 74.2, + "XPQARetrieval": 58.57 } ] }, "STS": { "cosine_spearman": [ { - "Model": "sentence-camembert-base", - "SICKFr": 74.18, - "STS22 (fr)": 77.54, - "STSBenchmarkMultilingualSTS (fr)": 81.64 + "Model": "sentence-croissant-llm-base", + "SICKFr": 69.6, + "STS22": 78.77, + "STSBenchmarkMultilingualSTS": 79.23 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "sentence-camembert-base", - "SummEvalFr": 28.77 + "Model": "sentence-croissant-llm-base", + "SummEvalFr": 29.04 } ] }, @@ -17971,95 +18258,147 @@ "p-MRR": [] } }, - "m3e-base": { + "sentence-t5-base": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "m3e-base", - "AmazonReviewsClassification (zh)": 43.02, - "IFlyTek": 44.42, - "JDReview": 85.33, - "MassiveIntentClassification (zh-CN)": 68.4, - "MassiveScenarioClassification (zh-CN)": 74.6, - "MultilingualSentiment": 71.9, - "OnlineShopping": 87.77, - "TNews": 48.28, - "Waimai": 83.99 + "Model": "sentence-t5-base", + "AmazonCounterfactualClassification": 46.05, + "AmazonPolarityClassification": 85.12, + "AmazonReviewsClassification": 21.53, + "Banking77Classification": 76.48, + "EmotionClassification": 51.35, + "ImdbClassification": 77.34, + "MTOPDomainClassification": 16.21, + "MTOPIntentClassification": 5.21, + "MasakhaNEWSClassification": 81.21, + "MassiveIntentClassification": 4.63, + "MassiveScenarioClassification": 7.91, + "ToxicConversationsClassification": 68.2, + "TweetSentimentExtractionClassification": 62.71 } ] }, "Clustering": { "v_measure": [ { - "Model": "m3e-base", - "CLSClusteringP2P": 39.81, - "CLSClusteringS2S": 37.34, - "ThuNewsClusteringP2P": 59.77, - "ThuNewsClusteringS2S": 53.78 + "Model": "sentence-t5-base", + "AlloProfClusteringP2P": 58.44, + "AlloProfClusteringS2S": 35.93, + "ArxivClusteringP2P": 39.28, + "ArxivClusteringS2S": 27.26, + "BiorxivClusteringP2P": 33.99, + "BiorxivClusteringS2S": 22.92, + "BlurbsClusteringP2P": 30.59, + "BlurbsClusteringS2S": 11.57, + "HALClusteringS2S": 17.72, + "MLSUMClusteringP2P": 40.77, + "MLSUMClusteringS2S": 30.06, + "MasakhaNEWSClusteringP2P": 61.9, + "MasakhaNEWSClusteringS2S": 35.64, + "MedrxivClusteringP2P": 33.2, + "MedrxivClusteringS2S": 26.13, + "RedditClustering": 52.93, + "RedditClusteringP2P": 59.67, + "StackExchangeClustering": 63.13, + "StackExchangeClusteringP2P": 35.68, + "TenKGnadClusteringP2P": 44.88, + "TenKGnadClusteringS2S": 18.11, + "TwentyNewsgroupsClustering": 48.1 } ] }, "PairClassification": { "max_ap": [ { - "Model": "m3e-base", - "Cmnli": 69.98, - "Ocnli": 58.0 + "Model": "sentence-t5-base", + "OpusparcusPC": 89.4, + "PawsXPairClassification": 55.35, + "SprintDuplicateQuestions": 91.23, + "TwitterSemEval2015": 78.25, + "TwitterURLCorpus": 86.05 }, { - "Model": "m3e-base", - "Cmnli": 70.0, - "Ocnli": 58.05 + "Model": "sentence-t5-base", + "OpusparcusPC": 89.41, + "PawsXPairClassification": 55.43, + "SprintDuplicateQuestions": 91.23, + "TwitterSemEval2015": 78.25, + "TwitterURLCorpus": 86.05 } ] }, "Reranking": { "map": [ { - "Model": "m3e-base", - "CMedQAv1": 77.05, - "CMedQAv2": 76.76, - "MMarcoReranking": 17.51, - "T2Reranking": 66.03 + "Model": "sentence-t5-base", + "AlloprofReranking": 50.12, + "AskUbuntuDupQuestions": 59.73, + "MindSmallReranking": 30.2, + "SciDocsRR": 73.96, + "StackOverflowDupQuestions": 48.46, + "SyntecReranking": 78.05 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "m3e-base", - "CmedqaRetrieval": 30.33, - "CovidRetrieval": 66.42, - "DuRetrieval": 75.76, - "EcomRetrieval": 50.27, - "MMarcoRetrieval": 65.46, - "MedicalRetrieval": 42.79, - "T2Retrieval": 73.14, - "VideoRetrieval": 51.11 + "Model": "sentence-t5-base", + "AlloprofRetrieval": 27.52, + "ArguAna": 44.85, + "BSARDRetrieval": 0.16, + "CQADupstackRetrieval": 35.23, + "ClimateFEVER": 10.37, + "DBPedia": 27.77, + "FEVER": 26.17, + "FiQA2018": 34.83, + "HotpotQA": 33.2, + "MSMARCO": 20.7, + "MintakaRetrieval": 21.04, + "NFCorpus": 28.65, + "NQ": 36.32, + "QuoraRetrieval": 85.49, + "SCIDOCS": 14.15, + "SciFact": 45.76, + "SyntecRetrieval": 67.0, + "TRECCOVID": 40.7, + "Touche2020": 20.3, + "XPQARetrieval": 45.19 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "sentence-t5-base", + "BIOSSES": 75.89, + "SICK-R": 80.18, + "SICKFr": 71.74, + "STS12": 78.05, + "STS13": 85.85, + "STS14": 82.19, + "STS15": 87.46, + "STS16": 84.03, + "STS17": 36.46, + "STS22": 20.15, + "STSBenchmark": 85.52, + "STSBenchmarkMultilingualSTS": 74.04 } ] }, - "STS": { + "Summarization": { "cosine_spearman": [ { - "Model": "m3e-base", - "AFQMC": 35.87, - "ATEC": 41.27, - "BQ": 63.81, - "LCQMC": 74.88, - "PAWSX": 12.19, - "QBQTC": 32.07, - "STS22 (zh)": 66.73, - "STSB": 76.97 + "Model": "sentence-t5-base", + "SummEval": 31.39, + "SummEvalFr": 30.01 } ] }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, @@ -18067,1035 +18406,448 @@ "p-MRR": [] } }, - "text-embedding-3-small": { + "sentence-t5-large": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "sentence-t5-large", + "BUCC": 0.95, + "Tatoeba": 14.67 + } + ] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-3-small", - "AmazonCounterfactualClassification (en)": 76.42, - "AmazonPolarityClassification": 90.84, - "AmazonReviewsClassification (en)": 45.73, - "Banking77Classification": 83.01, - "EmotionClassification": 50.63, - "ImdbClassification": 83.66, - "MTOPDomainClassification (en)": 93.91, - "MTOPIntentClassification (en)": 70.98, - "MassiveIntentClassification (en)": 72.86, - "MassiveScenarioClassification (en)": 76.84, - "ToxicConversationsClassification": 71.91, - "TweetSentimentExtractionClassification": 61.72 + "Model": "sentence-t5-large", + "AmazonCounterfactualClassification": 45.72, + "AmazonPolarityClassification": 92.87, + "AmazonReviewsClassification": 22.12, + "Banking77Classification": 78.46, + "EmotionClassification": 51.74, + "ImdbClassification": 87.01, + "MTOPDomainClassification": 15.82, + "MTOPIntentClassification": 4.96, + "MasakhaNEWSClassification": 80.43, + "MassiveIntentClassification": 3.49, + "MassiveScenarioClassification": 7.24, + "ToxicConversationsClassification": 71.73, + "TweetSentimentExtractionClassification": 62.33 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-3-small", - "ArxivClusteringP2P": 46.57, - "ArxivClusteringS2S": 39.35, - "BiorxivClusteringP2P": 37.77, - "BiorxivClusteringS2S": 34.68, - "MedrxivClusteringP2P": 32.77, - "MedrxivClusteringS2S": 31.85, - "RedditClustering": 64.09, - "RedditClusteringP2P": 65.12, - "StackExchangeClustering": 72.05, - "StackExchangeClusteringP2P": 34.04, - "TwentyNewsgroupsClustering": 54.81 + "Model": "sentence-t5-large", + "AlloProfClusteringP2P": 61.82, + "AlloProfClusteringS2S": 39.78, + "ArxivClusteringP2P": 41.62, + "ArxivClusteringS2S": 29.44, + "BiorxivClusteringP2P": 35.99, + "BiorxivClusteringS2S": 24.02, + "BlurbsClusteringP2P": 35.33, + "BlurbsClusteringS2S": 13.27, + "HALClusteringS2S": 18.73, + "MLSUMClusteringP2P": 42.07, + "MLSUMClusteringS2S": 31.87, + "MasakhaNEWSClusteringP2P": 58.6, + "MasakhaNEWSClusteringS2S": 31.33, + "MedrxivClusteringP2P": 32.4, + "MedrxivClusteringS2S": 26.33, + "RedditClustering": 54.53, + "RedditClusteringP2P": 62.5, + "StackExchangeClustering": 65.11, + "StackExchangeClusteringP2P": 36.86, + "TenKGnadClusteringP2P": 44.11, + "TenKGnadClusteringS2S": 17.26, + "TwentyNewsgroupsClustering": 49.33 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text-embedding-3-small", - "OpusparcusPC (fr)": 94.45, - "SprintDuplicateQuestions": 94.58, - "TwitterSemEval2015": 73.33, - "TwitterURLCorpus": 87.21 + "Model": "sentence-t5-large", + "OpusparcusPC": 91.19, + "PawsXPairClassification": 59.59, + "SprintDuplicateQuestions": 89.01, + "TwitterSemEval2015": 79.75, + "TwitterURLCorpus": 86.14 }, { - "Model": "text-embedding-3-small", - "OpusparcusPC (fr)": 94.45 + "Model": "sentence-t5-large", + "OpusparcusPC": 91.19, + "PawsXPairClassification": 59.69, + "SprintDuplicateQuestions": 89.02, + "TwitterSemEval2015": 79.75, + "TwitterURLCorpus": 86.14 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-3-small", - "AskUbuntuDupQuestions": 62.18, - "MindSmallReranking": 29.93, - "SciDocsRR": 83.25, - "StackOverflowDupQuestions": 51.53 + "Model": "sentence-t5-large", + "AlloprofReranking": 57.99, + "AskUbuntuDupQuestions": 61.51, + "MindSmallReranking": 30.27, + "SciDocsRR": 74.88, + "StackOverflowDupQuestions": 49.34, + "SyntecReranking": 79.77 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-small", - "ARCChallenge": 14.63, - "AlphaNLI": 30.61, - "ArguAna": 55.49, - "CQADupstackRetrieval": 42.58, - "ClimateFEVER": 26.86, - "DBPedia": 39.97, - "FEVER": 79.42, - "FiQA2018": 44.91, - "HellaSwag": 30.94, - "HotpotQA": 63.63, - "MSMARCO": 37.02, - "NFCorpus": 38.33, - "NQ": 52.86, - "PIQA": 33.69, - "Quail": 6.11, - "QuoraRetrieval": 88.83, - "RARbCode": 72.03, - "RARbMath": 71.07, - "SCIDOCS": 20.8, - "SIQA": 3.03, - "SciFact": 73.37, - "SpartQA": 6.63, - "TRECCOVID": 77.9, - "TempReasonL1": 2.35, - "TempReasonL2Fact": 25.68, - "TempReasonL2Pure": 2.76, - "TempReasonL3Fact": 22.09, - "TempReasonL3Pure": 9.79, - "Touche2020": 24.28, - "WinoGrande": 31.53 + "Model": "sentence-t5-large", + "AlloprofRetrieval": 34.52, + "ArguAna": 39.27, + "BSARDRetrieval": 0.0, + "CQADupstackRetrieval": 38.96, + "ClimateFEVER": 11.36, + "DBPedia": 31.55, + "FEVER": 36.21, + "FiQA2018": 43.55, + "HotpotQA": 33.95, + "MSMARCO": 23.96, + "MintakaRetrieval": 23.92, + "NFCorpus": 31.1, + "NQ": 42.02, + "QuoraRetrieval": 85.73, + "SCIDOCS": 15.38, + "SciFact": 49.91, + "SyntecRetrieval": 71.05, + "TRECCOVID": 46.11, + "Touche2020": 21.63, + "XPQARetrieval": 48.79 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text-embedding-3-small", - "BIOSSES": 88.72, - "SICK-R": 76.73, - "STS12": 73.09, - "STS13": 84.92, - "STS14": 79.81, - "STS15": 88.01, - "STS16": 84.41, - "STS17 (en-en)": 90.94, - "STS22 (en)": 64.96, - "STSBenchmark": 84.24 + "Model": "sentence-t5-large", + "BIOSSES": 78.93, + "SICK-R": 80.34, + "SICKFr": 72.83, + "STS12": 79.11, + "STS13": 87.33, + "STS14": 83.17, + "STS15": 88.28, + "STS16": 84.36, + "STS17": 45.95, + "STS22": 23.1, + "STSBenchmark": 85.36, + "STSBenchmarkMultilingualSTS": 77.59 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "text-embedding-3-small", - "SummEval": 31.12 - } - ] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "dragon-plus-instruct": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "dragon-plus-instruct", - "ARCChallenge": 8.24, - "AlphaNLI": 25.18, - "HellaSwag": 24.06, - "PIQA": 26.35, - "Quail": 4.2, - "RARbCode": 12.84, - "RARbMath": 36.15, - "SIQA": 1.75, - "SpartQA": 10.82, - "TempReasonL1": 1.54, - "TempReasonL2Fact": 16.11, - "TempReasonL2Pure": 0.57, - "TempReasonL3Fact": 14.81, - "TempReasonL3Pure": 7.46, - "WinoGrande": 60.84 + "Model": "sentence-t5-large", + "SummEval": 29.64, + "SummEvalFr": 30.23 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] - } - }, - "e5-mistral-7b-instruct": { - "BitextMining": { - "f1": [ - { - "Model": "e5-mistral-7b-instruct", - "BornholmBitextMining (dan-Latn)": 57.24, - "Tatoeba (arq-Arab_eng-Latn)": 48.55, - "Tatoeba (uig-Arab_eng-Latn)": 38.83, - "Tatoeba (csb-Latn_eng-Latn)": 62.92, - "Tatoeba (ces-Latn_eng-Latn)": 94.93, - "Tatoeba (urd-Arab_eng-Latn)": 83.62, - "Tatoeba (heb-Hebr_eng-Latn)": 82.82, - "Tatoeba (gsw-Latn_eng-Latn)": 52.21, - "Tatoeba (dan-Latn_eng-Latn)": 93.92, - "Tatoeba (zsm-Latn_eng-Latn)": 94.23, - "Tatoeba (kab-Latn_eng-Latn)": 3.81, - "Tatoeba (bre-Latn_eng-Latn)": 14.2, - "Tatoeba (spa-Latn_eng-Latn)": 98.8, - "Tatoeba (nld-Latn_eng-Latn)": 96.4, - "Tatoeba (ber-Tfng_eng-Latn)": 7.62, - "Tatoeba (cym-Latn_eng-Latn)": 72.32, - "Tatoeba (tel-Telu_eng-Latn)": 42.86, - "Tatoeba (ind-Latn_eng-Latn)": 93.48, - "Tatoeba (cbk-Latn_eng-Latn)": 81.32, - "Tatoeba (mal-Mlym_eng-Latn)": 56.03, - "Tatoeba (bos-Latn_eng-Latn)": 91.54, - "Tatoeba (kor-Hang_eng-Latn)": 91.16, - "Tatoeba (cmn-Hans_eng-Latn)": 95.9, - "Tatoeba (glg-Latn_eng-Latn)": 90.44, - "Tatoeba (nov-Latn_eng-Latn)": 72.4, - "Tatoeba (est-Latn_eng-Latn)": 62.93, - "Tatoeba (max-Deva_eng-Latn)": 64.8, - "Tatoeba (ile-Latn_eng-Latn)": 83.97, - "Tatoeba (dtp-Latn_eng-Latn)": 11.74, - "Tatoeba (mon-Cyrl_eng-Latn)": 37.79, - "Tatoeba (jav-Latn_eng-Latn)": 38.38, - "Tatoeba (xho-Latn_eng-Latn)": 35.94, - "Tatoeba (yid-Hebr_eng-Latn)": 33.12, - "Tatoeba (ang-Latn_eng-Latn)": 81.59, - "Tatoeba (pes-Arab_eng-Latn)": 89.67, - "Tatoeba (ceb-Latn_eng-Latn)": 43.15, - "Tatoeba (nob-Latn_eng-Latn)": 96.86, - "Tatoeba (ara-Arab_eng-Latn)": 88.76, - "Tatoeba (pms-Latn_eng-Latn)": 62.3, - "Tatoeba (swe-Latn_eng-Latn)": 92.95, - "Tatoeba (ina-Latn_eng-Latn)": 95.65, - "Tatoeba (kzj-Latn_eng-Latn)": 12.29, - "Tatoeba (por-Latn_eng-Latn)": 94.44, - "Tatoeba (bel-Cyrl_eng-Latn)": 88.09, - "Tatoeba (fao-Latn_eng-Latn)": 70.33, - "Tatoeba (tam-Taml_eng-Latn)": 72.83, - "Tatoeba (tat-Cyrl_eng-Latn)": 36.79, - "Tatoeba (vie-Latn_eng-Latn)": 94.83, - "Tatoeba (kaz-Cyrl_eng-Latn)": 46.88, - "Tatoeba (slv-Latn_eng-Latn)": 86.13, - "Tatoeba (lvs-Latn_eng-Latn)": 66.01, - "Tatoeba (ben-Beng_eng-Latn)": 81.82, - "Tatoeba (nno-Latn_eng-Latn)": 87.24, - "Tatoeba (tzl-Latn_eng-Latn)": 49.51, - "Tatoeba (ron-Latn_eng-Latn)": 93.0, - "Tatoeba (cha-Latn_eng-Latn)": 44.8, - "Tatoeba (mar-Deva_eng-Latn)": 68.99, - "Tatoeba (hin-Deva_eng-Latn)": 95.28, - "Tatoeba (aze-Latn_eng-Latn)": 76.23, - "Tatoeba (mkd-Cyrl_eng-Latn)": 86.9, - "Tatoeba (jpn-Jpan_eng-Latn)": 94.0, - "Tatoeba (wuu-Hans_eng-Latn)": 89.61, - "Tatoeba (gle-Latn_eng-Latn)": 73.71, - "Tatoeba (arz-Arab_eng-Latn)": 70.73, - "Tatoeba (swg-Latn_eng-Latn)": 64.75, - "Tatoeba (rus-Cyrl_eng-Latn)": 93.75, - "Tatoeba (ukr-Cyrl_eng-Latn)": 94.58, - "Tatoeba (ell-Grek_eng-Latn)": 91.46, - "Tatoeba (hun-Latn_eng-Latn)": 89.8, - "Tatoeba (bul-Cyrl_eng-Latn)": 93.98, - "Tatoeba (oci-Latn_eng-Latn)": 62.2, - "Tatoeba (awa-Deva_eng-Latn)": 67.45, - "Tatoeba (uzb-Latn_eng-Latn)": 52.73, - "Tatoeba (kur-Latn_eng-Latn)": 29.93, - "Tatoeba (fra-Latn_eng-Latn)": 95.66, - "Tatoeba (ido-Latn_eng-Latn)": 74.98, - "Tatoeba (dsb-Latn_eng-Latn)": 65.43, - "Tatoeba (hsb-Latn_eng-Latn)": 74.4, - "Tatoeba (swh-Latn_eng-Latn)": 61.6, - "Tatoeba (hye-Armn_eng-Latn)": 63.99, - "Tatoeba (isl-Latn_eng-Latn)": 87.58, - "Tatoeba (pol-Latn_eng-Latn)": 96.47, - "Tatoeba (yue-Hant_eng-Latn)": 89.11, - "Tatoeba (khm-Khmr_eng-Latn)": 37.31, - "Tatoeba (lfn-Latn_eng-Latn)": 73.89, - "Tatoeba (srp-Cyrl_eng-Latn)": 92.54, - "Tatoeba (lat-Latn_eng-Latn)": 87.8, - "Tatoeba (pam-Latn_eng-Latn)": 14.02, - "Tatoeba (ast-Latn_eng-Latn)": 80.94, - "Tatoeba (orv-Cyrl_eng-Latn)": 59.66, - "Tatoeba (cat-Latn_eng-Latn)": 91.89, - "Tatoeba (amh-Ethi_eng-Latn)": 22.05, - "Tatoeba (deu-Latn_eng-Latn)": 99.47, - "Tatoeba (war-Latn_eng-Latn)": 41.97, - "Tatoeba (tha-Thai_eng-Latn)": 93.64, - "Tatoeba (nds-Latn_eng-Latn)": 77.42, - "Tatoeba (gla-Latn_eng-Latn)": 63.53, - "Tatoeba (kat-Geor_eng-Latn)": 59.67, - "Tatoeba (epo-Latn_eng-Latn)": 87.76, - "Tatoeba (cor-Latn_eng-Latn)": 9.19, - "Tatoeba (lit-Latn_eng-Latn)": 69.07, - "Tatoeba (mhr-Cyrl_eng-Latn)": 16.24, - "Tatoeba (slk-Latn_eng-Latn)": 88.84, - "Tatoeba (eus-Latn_eng-Latn)": 40.64, - "Tatoeba (hrv-Latn_eng-Latn)": 93.97, - "Tatoeba (tgl-Latn_eng-Latn)": 93.14, - "Tatoeba (sqi-Latn_eng-Latn)": 67.06, - "Tatoeba (ita-Latn_eng-Latn)": 91.94, - "Tatoeba (tur-Latn_eng-Latn)": 94.13, - "Tatoeba (tuk-Latn_eng-Latn)": 42.01, - "Tatoeba (fin-Latn_eng-Latn)": 90.69, - "Tatoeba (fry-Latn_eng-Latn)": 70.98, - "Tatoeba (afr-Latn_eng-Latn)": 87.32 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "e5-mistral-7b-instruct", - "AllegroReviews (pol-Latn)": 59.78, - "AmazonCounterfactualClassification (en-ext)": 74.6, - "AmazonCounterfactualClassification (en)": 74.57, - "AmazonCounterfactualClassification (deu-Latn)": 71.25, - "AmazonCounterfactualClassification (jpn-Jpan)": 72.33, - "AmazonPolarityClassification": 96.26, - "AmazonReviewsClassification (fr)": 36.71, - "AmazonReviewsClassification (en)": 55.97, - "AmazonReviewsClassification (deu-Latn)": 54.94, - "AmazonReviewsClassification (spa-Latn)": 51.62, - "AmazonReviewsClassification (fra-Latn)": 50.27, - "AmazonReviewsClassification (jpn-Jpan)": 51.32, - "AmazonReviewsClassification (cmn-Hans)": 47.6, - "AngryTweetsClassification (dan-Latn)": 65.01, - "Banking77Classification": 81.41, - "CBD (pol-Latn)": 72.59, - "DanishPoliticalCommentsClassification (dan-Latn)": 34.68, - "EmotionClassification": 58.42, - "GeoreviewClassification (rus-Cyrl)": 56.72, - "HeadlineClassification (rus-Cyrl)": 87.02, - "IFlyTek (cmn-Hans)": 48.65, - "ImdbClassification": 94.73, - "InappropriatenessClassification (rus-Cyrl)": 70.36, - "JDReview (cmn-Hans)": 84.69, - "KinopoiskClassification (rus-Cyrl)": 68.35, - "LccSentimentClassification (dan-Latn)": 64.53, - "MTOPDomainClassification (fr)": 74.8, - "MTOPDomainClassification (en)": 95.33, - "MTOPDomainClassification (deu-Latn)": 90.48, - "MTOPDomainClassification (spa-Latn)": 90.22, - "MTOPDomainClassification (fra-Latn)": 88.49, - "MTOPDomainClassification (hin-Deva)": 86.67, - "MTOPDomainClassification (tha-Thai)": 83.42, - "MTOPIntentClassification (fr)": 53.97, - "MTOPIntentClassification (en)": 78.99, - "MTOPIntentClassification (deu-Latn)": 69.37, - "MTOPIntentClassification (spa-Latn)": 73.45, - "MTOPIntentClassification (fra-Latn)": 70.6, - "MTOPIntentClassification (hin-Deva)": 59.78, - "MTOPIntentClassification (tha-Thai)": 62.24, - "MasakhaNEWSClassification (fra)": 80.59, - "MasakhaNEWSClassification (amh-Ethi)": 53.11, - "MasakhaNEWSClassification (eng)": 85.89, - "MasakhaNEWSClassification (fra-Latn)": 82.94, - "MasakhaNEWSClassification (hau-Latn)": 76.73, - "MasakhaNEWSClassification (ibo-Latn)": 77.49, - "MasakhaNEWSClassification (lin-Latn)": 83.94, - "MasakhaNEWSClassification (lug-Latn)": 73.95, - "MasakhaNEWSClassification (orm-Ethi)": 77.72, - "MasakhaNEWSClassification (pcm-Latn)": 96.03, - "MasakhaNEWSClassification (run-Latn)": 85.81, - "MasakhaNEWSClassification (sna-Latn)": 89.73, - "MasakhaNEWSClassification (som-Latn)": 66.19, - "MasakhaNEWSClassification (swa-Latn)": 79.5, - "MasakhaNEWSClassification (tir-Ethi)": 33.05, - "MasakhaNEWSClassification (xho-Latn)": 87.71, - "MasakhaNEWSClassification (yor-Latn)": 85.55, - "MassiveIntentClassification (fr)": 46.39, - "MassiveIntentClassification (ron-Latn)": 63.81, - "MassiveIntentClassification (ben-Beng)": 58.21, - "MassiveIntentClassification (jpn-Jpan)": 71.72, - "MassiveIntentClassification (kor-Kore)": 69.85, - "MassiveIntentClassification (fin-Latn)": 64.37, - "MassiveIntentClassification (pol-Latn)": 71.03, - "MassiveIntentClassification (fas-Arab)": 69.02, - "MassiveIntentClassification (cym-Latn)": 46.83, - "MassiveIntentClassification (hye-Armn)": 46.34, - "MassiveIntentClassification (lav-Latn)": 49.61, - "MassiveIntentClassification (slv-Latn)": 63.04, - "MassiveIntentClassification (sqi-Latn)": 48.78, - "MassiveIntentClassification (vie-Latn)": 66.52, - "MassiveIntentClassification (isl-Latn)": 52.71, - "MassiveIntentClassification (por-Latn)": 71.63, - "MassiveIntentClassification (hun-Latn)": 64.87, - "MassiveIntentClassification (hin-Deva)": 63.91, - "MassiveIntentClassification (ita-Latn)": 71.35, - "MassiveIntentClassification (khm-Khmr)": 39.57, - "MassiveIntentClassification (spa-Latn)": 70.88, - "MassiveIntentClassification (urd-Arab)": 57.26, - "MassiveIntentClassification (ara-Arab)": 57.67, - "MassiveIntentClassification (fra-Latn)": 71.66, - "MassiveIntentClassification (nob-Latn)": 65.35, - "MassiveIntentClassification (deu-Latn)": 70.52, - "MassiveIntentClassification (aze-Latn)": 58.14, - "MassiveIntentClassification (heb-Hebr)": 62.36, - "MassiveIntentClassification (cmo-Hant)": 65.89, - "MassiveIntentClassification (mon-Cyrl)": 40.83, - "MassiveIntentClassification (amh-Ethi)": 34.74, - "MassiveIntentClassification (nld-Latn)": 71.24, - "MassiveIntentClassification (swa-Latn)": 51.71, - "MassiveIntentClassification (tha-Thai)": 61.29, - "MassiveIntentClassification (mya-Mymr)": 38.8, - "MassiveIntentClassification (cmo-Hans)": 72.46, - "MassiveIntentClassification (kat-Geor)": 45.05, - "MassiveIntentClassification (jav-Latn)": 50.08, - "MassiveIntentClassification (tgl-Latn)": 62.43, - "MassiveIntentClassification (mal-Mlym)": 41.09, - "MassiveIntentClassification (swe-Latn)": 70.41, - "MassiveIntentClassification (tam-Taml)": 44.28, - "MassiveIntentClassification (msa-Latn)": 66.36, - "MassiveIntentClassification (tur-Latn)": 66.9, - "MassiveIntentClassification (ind-Latn)": 68.0, - "MassiveIntentClassification (rus-Cyrl)": 73.74, - "MassiveIntentClassification (tel-Telu)": 45.47, - "MassiveIntentClassification (afr-Latn)": 62.05, - "MassiveIntentClassification (ell-Grek)": 64.04, - "MassiveIntentClassification (dan-Latn)": 68.11, - "MassiveIntentClassification (en)": 77.0, - "MassiveIntentClassification (kan-Knda)": 45.41, - "MassiveScenarioClassification (fr)": 53.86, - "MassiveScenarioClassification (ind-Latn)": 73.78, - "MassiveScenarioClassification (khm-Khmr)": 48.42, - "MassiveScenarioClassification (por-Latn)": 74.21, - "MassiveScenarioClassification (mon-Cyrl)": 48.26, - "MassiveScenarioClassification (ron-Latn)": 68.97, - "MassiveScenarioClassification (rus-Cyrl)": 77.1, - "MassiveScenarioClassification (fin-Latn)": 68.83, - "MassiveScenarioClassification (tgl-Latn)": 70.87, - "MassiveScenarioClassification (cym-Latn)": 58.84, - "MassiveScenarioClassification (hye-Armn)": 53.61, - "MassiveScenarioClassification (amh-Ethi)": 42.25, - "MassiveScenarioClassification (tur-Latn)": 70.73, - "MassiveScenarioClassification (kat-Geor)": 54.37, - "MassiveScenarioClassification (spa-Latn)": 74.94, - "MassiveScenarioClassification (nld-Latn)": 76.48, - "MassiveScenarioClassification (slv-Latn)": 70.59, - "MassiveScenarioClassification (nob-Latn)": 72.48, - "MassiveScenarioClassification (jav-Latn)": 61.38, - "MassiveScenarioClassification (aze-Latn)": 63.77, - "MassiveScenarioClassification (tha-Thai)": 69.67, - "MassiveScenarioClassification (ara-Arab)": 67.15, - "MassiveScenarioClassification (ben-Beng)": 64.73, - "MassiveScenarioClassification (deu-Latn)": 77.22, - "MassiveScenarioClassification (kor-Kore)": 74.41, - "MassiveScenarioClassification (cmo-Hant)": 71.62, - "MassiveScenarioClassification (tel-Telu)": 53.65, - "MassiveScenarioClassification (mal-Mlym)": 49.48, - "MassiveScenarioClassification (tam-Taml)": 52.58, - "MassiveScenarioClassification (hun-Latn)": 70.97, - "MassiveScenarioClassification (ell-Grek)": 69.77, - "MassiveScenarioClassification (cmo-Hans)": 76.4, - "MassiveScenarioClassification (fas-Arab)": 74.61, - "MassiveScenarioClassification (dan-Latn)": 73.77, - "MassiveScenarioClassification (kan-Knda)": 55.85, - "MassiveScenarioClassification (fra-Latn)": 75.65, - "MassiveScenarioClassification (msa-Latn)": 72.85, - "MassiveScenarioClassification (jpn-Jpan)": 75.79, - "MassiveScenarioClassification (hin-Deva)": 69.65, - "MassiveScenarioClassification (mya-Mymr)": 46.32, - "MassiveScenarioClassification (en)": 79.13, - "MassiveScenarioClassification (lav-Latn)": 58.4, - "MassiveScenarioClassification (pol-Latn)": 75.09, - "MassiveScenarioClassification (sqi-Latn)": 58.29, - "MassiveScenarioClassification (urd-Arab)": 64.12, - "MassiveScenarioClassification (isl-Latn)": 63.59, - "MassiveScenarioClassification (swe-Latn)": 76.81, - "MassiveScenarioClassification (heb-Hebr)": 65.48, - "MassiveScenarioClassification (afr-Latn)": 71.0, - "MassiveScenarioClassification (ita-Latn)": 74.88, - "MassiveScenarioClassification (swa-Latn)": 62.31, - "MassiveScenarioClassification (vie-Latn)": 71.92, - "MultilingualSentiment (cmn-Hans)": 73.47, - "NoRecClassification (nob-Latn)": 55.0, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 69.13, - "OnlineShopping (cmn-Hans)": 92.56, - "PAC (pol-Latn)": 62.26, - "PolEmo2.0-IN (pol-Latn)": 85.58, - "PolEmo2.0-OUT (pol-Latn)": 60.55, - "RuReviewsClassification (rus-Cyrl)": 70.57, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 66.05, - "RuSciBenchOECDClassification (rus-Cyrl)": 52.11, - "TNews (cmn-Hans)": 50.58, - "ToxicConversationsClassification": 71.75, - "TweetSentimentExtractionClassification": 64.89, - "Waimai (cmn-Hans)": 87.79 + } + }, + "sentence-t5-xl": { + "BitextMining": { + "f1": [ + { + "Model": "sentence-t5-xl", + "BUCC": 1.3, + "Tatoeba": 35.71 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "sentence-t5-xl", + "AmazonCounterfactualClassification": 45.61, + "AmazonPolarityClassification": 93.17, + "AmazonReviewsClassification": 21.88, + "Banking77Classification": 80.88, + "EmotionClassification": 51.95, + "ImdbClassification": 87.54, + "MTOPDomainClassification": 15.62, + "MTOPIntentClassification": 5.08, + "MasakhaNEWSClassification": 80.09, + "MassiveIntentClassification": 3.24, + "MassiveScenarioClassification": 7.14, + "ToxicConversationsClassification": 70.95, + "TweetSentimentExtractionClassification": 61.21 } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-mistral-7b-instruct", - "AlloProfClusteringP2P": 61.06, - "AlloProfClusteringS2S": 28.12, - "GeoreviewClusteringP2P (rus-Cyrl)": 76.32, - "HALClusteringS2S": 19.69, - "MLSUMClusteringP2P": 45.59, - "MLSUMClusteringS2S": 32.0, - "MasakhaNEWSClusteringP2P (fra)": 52.47, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 47.57, - "MasakhaNEWSClusteringP2P (eng)": 71.97, - "MasakhaNEWSClusteringP2P (fra-Latn)": 81.18, - "MasakhaNEWSClusteringP2P (hau-Latn)": 72.23, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 66.65, - "MasakhaNEWSClusteringP2P (lin-Latn)": 75.85, - "MasakhaNEWSClusteringP2P (lug-Latn)": 53.69, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 52.91, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 92.6, - "MasakhaNEWSClusteringP2P (run-Latn)": 62.47, - "MasakhaNEWSClusteringP2P (sna-Latn)": 82.99, - "MasakhaNEWSClusteringP2P (som-Latn)": 41.28, - "MasakhaNEWSClusteringP2P (swa-Latn)": 50.54, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 46.94, - "MasakhaNEWSClusteringP2P (xho-Latn)": 59.17, - "MasakhaNEWSClusteringP2P (yor-Latn)": 65.86, - "MasakhaNEWSClusteringS2S (fra)": 49.2, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.55, - "MasakhaNEWSClusteringS2S (eng)": 74.23, - "MasakhaNEWSClusteringS2S (fra-Latn)": 74.9, - "MasakhaNEWSClusteringS2S (hau-Latn)": 40.78, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 59.84, - "MasakhaNEWSClusteringS2S (lin-Latn)": 80.96, - "MasakhaNEWSClusteringS2S (lug-Latn)": 46.03, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 36.15, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 92.94, - "MasakhaNEWSClusteringS2S (run-Latn)": 62.3, - "MasakhaNEWSClusteringS2S (sna-Latn)": 52.03, - "MasakhaNEWSClusteringS2S (som-Latn)": 36.18, - "MasakhaNEWSClusteringS2S (swa-Latn)": 35.33, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 43.92, - "MasakhaNEWSClusteringS2S (xho-Latn)": 26.61, - "MasakhaNEWSClusteringS2S (yor-Latn)": 63.18, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 62.27, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 54.13 + "Model": "sentence-t5-xl", + "AlloProfClusteringP2P": 60.37, + "AlloProfClusteringS2S": 40.76, + "ArxivClusteringP2P": 41.62, + "ArxivClusteringS2S": 31.17, + "BiorxivClusteringP2P": 36.43, + "BiorxivClusteringS2S": 26.47, + "HALClusteringS2S": 20.28, + "MLSUMClusteringP2P": 41.61, + "MLSUMClusteringS2S": 33.6, + "MasakhaNEWSClusteringP2P": 62.82, + "MasakhaNEWSClusteringS2S": 31.74, + "MedrxivClusteringP2P": 32.3, + "MedrxivClusteringS2S": 26.93, + "RedditClustering": 57.03, + "RedditClusteringP2P": 62.34, + "StackExchangeClustering": 67.13, + "StackExchangeClusteringP2P": 34.79, + "TwentyNewsgroupsClustering": 49.53 } ] }, "PairClassification": { "max_ap": [ { - "Model": "e5-mistral-7b-instruct", - "CDSC-E (pol-Latn)": 75.86, - "OpusparcusPC (deu-Latn)": 97.63, - "OpusparcusPC (en)": 99.1, - "OpusparcusPC (fin-Latn)": 92.76, - "OpusparcusPC (fra-Latn)": 95.23, - "OpusparcusPC (rus-Cyrl)": 91.44, - "OpusparcusPC (swe-Latn)": 95.54, - "PSC (pol-Latn)": 99.47, - "PawsXPairClassification (deu-Latn)": 58.47, - "PawsXPairClassification (en)": 67.1, - "PawsXPairClassification (spa-Latn)": 60.03, - "PawsXPairClassification (fra-Latn)": 61.85, - "PawsXPairClassification (jpn-Hira)": 51.95, - "PawsXPairClassification (kor-Hang)": 53.11, - "PawsXPairClassification (cmn-Hans)": 59.29, - "SICK-E-PL (pol-Latn)": 79.94, - "SprintDuplicateQuestions": 95.66, - "TERRa (rus-Cyrl)": 60.81, - "TwitterSemEval2015": 81.62, - "TwitterURLCorpus": 87.75 - }, - { - "Model": "e5-mistral-7b-instruct", - "CDSC-E (pol-Latn)": 75.86, - "OpusparcusPC (fr)": 89.71, - "OpusparcusPC (deu-Latn)": 97.63, - "OpusparcusPC (en)": 99.1, - "OpusparcusPC (fin-Latn)": 92.76, - "OpusparcusPC (fra-Latn)": 95.23, - "OpusparcusPC (rus-Cyrl)": 91.44, - "OpusparcusPC (swe-Latn)": 95.54, - "PSC (pol-Latn)": 99.53, - "PawsXPairClassification (fr)": 64.33, - "PawsXPairClassification (deu-Latn)": 58.67, - "PawsXPairClassification (en)": 67.34, - "PawsXPairClassification (spa-Latn)": 60.24, - "PawsXPairClassification (fra-Latn)": 62.03, - "PawsXPairClassification (jpn-Hira)": 52.15, - "PawsXPairClassification (kor-Hang)": 53.14, - "PawsXPairClassification (cmn-Hans)": 59.4, - "SICK-E-PL (pol-Latn)": 79.94, - "SprintDuplicateQuestions": 95.86, - "TERRa (rus-Cyrl)": 60.81, - "TwitterSemEval2015": 81.62, - "TwitterURLCorpus": 87.79 + "Model": "sentence-t5-xl", + "OpusparcusPC": 92.48, + "PawsXPairClassification": 62.52, + "SprintDuplicateQuestions": 91.44, + "TwitterSemEval2015": 80.89, + "TwitterURLCorpus": 85.86 }, { - "Model": "e5-mistral-7b-instruct", - "OpusparcusPC (fr)": 88.5, - "PawsXPairClassification (fr)": 63.65 + "Model": "sentence-t5-xl", + "OpusparcusPC": 92.48, + "PawsXPairClassification": 62.59, + "SprintDuplicateQuestions": 91.44, + "TwitterSemEval2015": 80.89, + "TwitterURLCorpus": 85.86 } ] }, "Reranking": { "map": [ { - "Model": "e5-mistral-7b-instruct", - "AlloprofReranking": 47.36, - "AlloprofReranking (fra-Latn)": 78.32, - "AskUbuntuDupQuestions": 66.98, - "MMarcoReranking (cmn-Hans)": 24.21, - "MindSmallReranking": 32.59, - "RuBQReranking (rus-Cyrl)": 76.32, - "SciDocsRR": 86.34, - "StackOverflowDupQuestions": 54.93, - "SyntecReranking": 77.05, - "SyntecReranking (fra-Latn)": 91.08, - "T2Reranking (cmn-Hans)": 66.9 - }, - { - "Model": "e5-mistral-7b-instruct", - "MIRACLReranking (rus-Cyrl)": 63.61 + "Model": "sentence-t5-xl", + "AlloprofReranking": 63.3, + "AskUbuntuDupQuestions": 62.86, + "MindSmallReranking": 29.77, + "SciDocsRR": 75.16, + "StackOverflowDupQuestions": 51.05, + "SyntecReranking": 83.07 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-mistral-7b-instruct", - "AILACasedocs": 36.66, - "AILAStatutes": 34.53, - "ARCChallenge": 19.0, - "AlloprofRetrieval": 16.46, - "AlloprofRetrieval (fra-Latn)": 54.62, - "AlphaNLI": 26.02, - "AppsRetrieval (eng-Latn_python-Code)": 23.46, - "ArguAna": 61.65, - "ArguAna-PL (pol-Latn)": 49.2, - "BSARDRetrieval": 0.0, - "BSARDRetrieval (fra-Latn)": 25.23, - "BrightRetrieval (sustainable_living)": 18.51, - "BrightRetrieval (economics)": 15.49, - "BrightRetrieval (theoremqa_theorems)": 25.09, - "BrightRetrieval (aops)": 7.1, - "BrightRetrieval (theoremqa_questions)": 23.94, - "BrightRetrieval (stackoverflow)": 9.83, - "BrightRetrieval (psychology)": 15.79, - "BrightRetrieval (pony)": 4.81, - "BrightRetrieval (leetcode)": 28.72, - "BrightRetrieval (biology)": 18.84, - "BrightRetrieval (earth_science)": 25.96, - "BrightRetrieval (robotics)": 16.37, - "CmedqaRetrieval (cmn-Hans)": 34.23, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 36.4, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 76.41, - "CodeSearchNetCCRetrieval (python-Code)": 90.6, - "CodeSearchNetCCRetrieval (javascript-Code)": 86.18, - "CodeSearchNetCCRetrieval (go-Code)": 84.05, - "CodeSearchNetCCRetrieval (ruby-Code)": 85.89, - "CodeSearchNetCCRetrieval (java-Code)": 86.21, - "CodeSearchNetCCRetrieval (php-Code)": 75.95, - "CodeSearchNetRetrieval (python-Code)": 91.75, - "CodeSearchNetRetrieval (javascript-Code)": 80.93, - "CodeSearchNetRetrieval (go-Code)": 93.06, - "CodeSearchNetRetrieval (ruby-Code)": 85.37, - "CodeSearchNetRetrieval (java-Code)": 84.08, - "CodeSearchNetRetrieval (php-Code)": 83.14, - "CodeTransOceanContest (python-Code_c++-Code)": 88.58, - "CodeTransOceanDL": 31.74, - "CosQA (eng-Latn_python-Code)": 33.1, - "CovidRetrieval (cmn-Hans)": 73.11, - "DuRetrieval (cmn-Hans)": 87.04, - "EcomRetrieval (cmn-Hans)": 45.94, - "FiQA-PL (pol-Latn)": 35.34, - "FiQA2018": 56.81, - "GerDaLIRSmall": 37.18, - "GerDaLIRSmall (deu-Latn)": 16.01, - "HellaSwag": 35.37, - "LEMBNarrativeQARetrieval": 37.21, - "LEMBQMSumRetrieval": 28.63, - "LEMBSummScreenFDRetrieval": 75.11, - "LEMBWikimQARetrieval": 58.71, - "LeCaRDv2": 68.56, - "LeCaRDv2 (zho-Hans)": 59.42, - "LegalBenchConsumerContractsQA": 77.98, - "LegalBenchCorporateLobbying": 94.6, - "LegalQuAD": 59.64, - "LegalQuAD (deu-Latn)": 44.41, - "LegalSummarization": 70.86, - "MIRACLRetrieval (rus-Cyrl)": 67.66, - "MMarcoRetrieval (cmn-Hans)": 74.84, - "MedicalRetrieval (cmn-Hans)": 52.83, - "MintakaRetrieval (fr)": 3.57, - "MintakaRetrieval (ara-Arab)": 24.33, - "MintakaRetrieval (deu-Latn)": 46.42, - "MintakaRetrieval (spa-Latn)": 44.42, - "MintakaRetrieval (fra-Latn)": 46.07, - "MintakaRetrieval (hin-Deva)": 25.5, - "MintakaRetrieval (ita-Latn)": 43.36, - "MintakaRetrieval (jpn-Hira)": 33.72, - "MintakaRetrieval (por-Latn)": 47.49, - "NFCorpus": 38.58, - "NFCorpus-PL (pol-Latn)": 30.97, - "PIQA": 39.83, - "Quail": 7.28, - "RARbCode": 79.77, - "RARbMath": 72.0, - "RiaNewsRetrieval (rus-Cyrl)": 78.94, - "RuBQRetrieval (rus-Cyrl)": 75.98, - "SCIDOCS": 16.32, - "SCIDOCS-PL (pol-Latn)": 16.9, - "SIQA": 5.68, - "SciFact": 76.42, - "SciFact-PL (pol-Latn)": 68.11, - "SpartQA": 10.03, - "StackOverflowQA": 91.02, - "SyntecRetrieval": 55.9, - "SyntecRetrieval (fra-Latn)": 92.39, - "SyntheticText2SQL (eng-Latn_sql-Code)": 59.2, - "T2Retrieval (cmn-Hans)": 80.68, - "TRECCOVID": 87.03, - "TRECCOVID-PL (pol-Latn)": 73.58, - "TempReasonL1": 3.6, - "TempReasonL2Fact": 36.19, - "TempReasonL2Pure": 8.93, - "TempReasonL3Fact": 29.98, - "TempReasonL3Pure": 14.15, - "Touche2020": 26.27, - "VideoRetrieval (cmn-Hans)": 45.34, - "WinoGrande": 39.51, - "XPQARetrieval (fr)": 41.29, - "XPQARetrieval (ara-Arab_ara-Arab)": 45.94, - "XPQARetrieval (eng-Latn_ara-Arab)": 30.38, - "XPQARetrieval (ara-Arab_eng-Latn)": 41.53, - "XPQARetrieval (deu-Latn_deu-Latn)": 77.83, - "XPQARetrieval (eng-Latn_deu-Latn)": 41.58, - "XPQARetrieval (deu-Latn_eng-Latn)": 72.43, - "XPQARetrieval (spa-Latn_spa-Latn)": 60.56, - "XPQARetrieval (eng-Latn_spa-Latn)": 29.4, - "XPQARetrieval (spa-Latn_eng-Latn)": 58.02, - "XPQARetrieval (fra-Latn_fra-Latn)": 69.72, - "XPQARetrieval (eng-Latn_fra-Latn)": 38.41, - "XPQARetrieval (fra-Latn_eng-Latn)": 64.51, - "XPQARetrieval (hin-Deva_hin-Deva)": 73.77, - "XPQARetrieval (eng-Latn_hin-Deva)": 19.11, - "XPQARetrieval (hin-Deva_eng-Latn)": 66.29, - "XPQARetrieval (ita-Latn_ita-Latn)": 74.27, - "XPQARetrieval (eng-Latn_ita-Latn)": 26.44, - "XPQARetrieval (ita-Latn_eng-Latn)": 67.28, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 72.92, - "XPQARetrieval (eng-Latn_jpn-Hira)": 39.98, - "XPQARetrieval (jpn-Hira_eng-Latn)": 69.65, - "XPQARetrieval (kor-Hang_kor-Hang)": 39.18, - "XPQARetrieval (eng-Latn_kor-Hang)": 30.34, - "XPQARetrieval (kor-Hang_eng-Latn)": 33.83, - "XPQARetrieval (pol-Latn_pol-Latn)": 46.33, - "XPQARetrieval (eng-Latn_pol-Latn)": 32.92, - "XPQARetrieval (pol-Latn_eng-Latn)": 43.03, - "XPQARetrieval (por-Latn_por-Latn)": 49.25, - "XPQARetrieval (eng-Latn_por-Latn)": 24.44, - "XPQARetrieval (por-Latn_eng-Latn)": 48.68, - "XPQARetrieval (tam-Taml_tam-Taml)": 43.15, - "XPQARetrieval (eng-Latn_tam-Taml)": 3.59, - "XPQARetrieval (tam-Taml_eng-Latn)": 20.34, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 63.0, - "XPQARetrieval (eng-Latn_cmn-Hans)": 34.01, - "XPQARetrieval (cmn-Hans_eng-Latn)": 56.43 - }, - { - "Model": "e5-mistral-7b-instruct", - "LEMBNeedleRetrieval": 31.5, - "LEMBPasskeyRetrieval": 30.75 - } - ], - "recall_at_1": [ - { - "Model": "e5-mistral-7b-instruct", - "BrightRetrieval (pony)": 1.14, - "BrightRetrieval (robotics)": 17.33, - "BrightRetrieval (economics)": 26.21, - "BrightRetrieval (biology)": 29.93, - "BrightRetrieval (earth_science)": 36.28, - "BrightRetrieval (psychology)": 46.73, - "BrightRetrieval (sustainable_living)": 32.21, - "BrightRetrieval (stackoverflow)": 14.53 + "Model": "sentence-t5-xl", + "AlloprofRetrieval": 40.38, + "ArguAna": 39.4, + "BSARDRetrieval": 0.14, + "CQADupstackRetrieval": 40.78, + "ClimateFEVER": 10.61, + "DBPedia": 33.65, + "FEVER": 36.12, + "FiQA2018": 44.71, + "HotpotQA": 37.17, + "MSMARCO": 25.17, + "MintakaRetrieval": 31.54, + "NFCorpus": 33.18, + "NQ": 46.29, + "QuoraRetrieval": 85.85, + "SCIDOCS": 15.97, + "SciFact": 50.91, + "SyntecRetrieval": 74.24, + "TRECCOVID": 54.77, + "Touche2020": 22.51, + "XPQARetrieval": 52.14 } ] }, "STS": { "cosine_spearman": [ { - "Model": "e5-mistral-7b-instruct", - "AFQMC (cmn-Hans)": 38.99, - "ATEC (cmn-Hans)": 42.84, - "BIOSSES": 85.5, - "BQ (cmn-Hans)": 50.63, - "CDSC-R (pol-Latn)": 92.19, - "LCQMC (cmn-Hans)": 75.48, - "PAWSX (cmn-Hans)": 16.81, - "RUParaPhraserSTS (rus-Cyrl)": 76.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 84.13, - "SICK-R": 82.64, - "SICK-R-PL (pol-Latn)": 76.67, - "SICKFr (fra-Latn)": 80.99, - "STS12": 79.65, - "STS13": 88.43, - "STS14": 84.54, - "STS15": 90.42, - "STS16": 87.69, - "STS17 (ara-Arab)": 81.87, - "STS17 (eng-Latn_ara-Arab)": 77.95, - "STS17 (nld-Latn_eng-Latn)": 88.25, - "STS17 (eng-Latn_tur-Latn)": 72.59, - "STS17 (eng-Latn_deu-Latn)": 87.3, - "STS17 (kor-Hang)": 83.69, - "STS17 (spa-Latn)": 87.46, - "STS17 (en-en)": 91.76, - "STS17 (spa-Latn_eng-Latn)": 88.24, - "STS17 (fra-Latn_eng-Latn)": 88.08, - "STS17 (ita-Latn_eng-Latn)": 89.69, - "STS22 (rus-Cyrl)": 60.83, - "STSB (cmn-Hans)": 81.81, - "STSBenchmark": 88.6, - "STSBenchmarkMultilingualSTS (pol-Latn)": 83.62, - "STSBenchmarkMultilingualSTS (spa-Latn)": 86.13, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 84.25, - "STSBenchmarkMultilingualSTS (nld-Latn)": 83.69, - "STSBenchmarkMultilingualSTS (deu-Latn)": 85.37, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 82.61, - "STSBenchmarkMultilingualSTS (en)": 88.6, - "STSBenchmarkMultilingualSTS (fra-Latn)": 85.49, - "STSBenchmarkMultilingualSTS (ita-Latn)": 84.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 84.57 - }, - { - "Model": "e5-mistral-7b-instruct", - "AFQMC (cmn-Hans)": 38.99, - "ATEC (cmn-Hans)": 42.84, - "BIOSSES": 85.5, - "BQ (cmn-Hans)": 50.63, - "CDSC-R (pol-Latn)": 92.19, - "LCQMC (cmn-Hans)": 75.48, - "PAWSX (cmn-Hans)": 16.81, - "RUParaPhraserSTS (rus-Cyrl)": 76.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 84.13, - "SICK-R": 82.64, - "SICK-R-PL (pol-Latn)": 76.67, - "SICKFr (fra-Latn)": 80.99, - "STS12": 79.65, - "STS13": 88.43, - "STS14": 84.54, - "STS15": 90.42, - "STS16": 87.69, - "STS17 (ara-Arab)": 81.87, - "STS17 (eng-Latn_ara-Arab)": 77.95, - "STS17 (nld-Latn_eng-Latn)": 88.25, - "STS17 (eng-Latn_tur-Latn)": 72.59, - "STS17 (eng-Latn_deu-Latn)": 87.3, - "STS17 (kor-Hang)": 83.69, - "STS17 (spa-Latn)": 87.46, - "STS17 (en-en)": 91.76, - "STS17 (spa-Latn_eng-Latn)": 88.24, - "STS17 (fra-Latn_eng-Latn)": 88.08, - "STS17 (ita-Latn_eng-Latn)": 89.69, - "STS22 (rus-Cyrl)": 60.83, - "STSB (cmn-Hans)": 81.81, - "STSBenchmark": 88.6, - "STSBenchmarkMultilingualSTS (pol-Latn)": 83.62, - "STSBenchmarkMultilingualSTS (spa-Latn)": 86.13, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 84.25, - "STSBenchmarkMultilingualSTS (nld-Latn)": 83.69, - "STSBenchmarkMultilingualSTS (deu-Latn)": 85.37, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 82.61, - "STSBenchmarkMultilingualSTS (en)": 88.6, - "STSBenchmarkMultilingualSTS (fra-Latn)": 85.49, - "STSBenchmarkMultilingualSTS (ita-Latn)": 84.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 84.57 - }, - { - "Model": "e5-mistral-7b-instruct", - "SICKFr": 64.39, - "STS22 (fr)": 69.82, - "STSBenchmarkMultilingualSTS (fr)": 61.87 + "Model": "sentence-t5-xl", + "BIOSSES": 73.12, + "SICK-R": 79.98, + "SICKFr": 75.08, + "STS12": 79.02, + "STS13": 88.8, + "STS14": 84.33, + "STS15": 88.89, + "STS16": 85.31, + "STS17": 66.12, + "STS22": 29.0, + "STSBenchmark": 83.93, + "STSBenchmarkMultilingualSTS": 79.42 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "e5-mistral-7b-instruct", - "SummEval": 31.53, - "SummEvalFr (fra-Latn)": 31.05 - }, - { - "Model": "e5-mistral-7b-instruct", - "SummEval": 31.53, - "SummEvalFr (fra-Latn)": 31.05 - }, - { - "Model": "e5-mistral-7b-instruct", - "SummEvalFr": 32.22 + "Model": "sentence-t5-xl", + "SummEval": 29.91, + "SummEvalFr": 31.59 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "e5-mistral-7b-instruct", - "CEDRClassification (rus-Cyrl)": 51.94, - "SensitiveTopicsClassification (rus-Cyrl)": 33.92 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "e5-mistral-7b-instruct", - "Core17InstructionRetrieval": 3.76, - "News21InstructionRetrieval": 0.74, - "Robust04InstructionRetrieval": -6.35 - } - ] + "p-MRR": [] } }, - "LLM2Vec-Sheared-Llama-unsupervised": { + "sentence-t5-xxl": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "AmazonCounterfactualClassification (en)": 72.93, - "AmazonPolarityClassification": 74.28, - "AmazonReviewsClassification (en)": 36.14, - "Banking77Classification": 79.0, - "EmotionClassification": 42.85, - "ImdbClassification": 71.92, - "MTOPDomainClassification (en)": 91.24, - "MTOPIntentClassification (en)": 74.08, - "MassiveIntentClassification (en)": 69.99, - "MassiveScenarioClassification (en)": 75.15, - "ToxicConversationsClassification": 68.4, - "TweetSentimentExtractionClassification": 56.08 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "ArxivClusteringP2P": 42.92, - "ArxivClusteringS2S": 35.2, - "BiorxivClusteringP2P": 35.02, - "BiorxivClusteringS2S": 27.21, - "MedrxivClusteringP2P": 30.15, - "MedrxivClusteringS2S": 26.96, - "RedditClustering": 38.67, - "RedditClusteringP2P": 53.42, - "StackExchangeClustering": 59.35, - "StackExchangeClusteringP2P": 31.47, - "TwentyNewsgroupsClustering": 31.54 + "Model": "sentence-t5-xxl", + "AmazonCounterfactualClassification": 77.07, + "AmazonPolarityClassification": 92.79, + "AmazonReviewsClassification": 46.09, + "Banking77Classification": 82.31, + "EmotionClassification": 48.57, + "ImdbClassification": 90.23, + "MTOPDomainClassification": 86.2, + "MTOPIntentClassification": 58.33, + "MasakhaNEWSClassification": 79.1, + "MassiveIntentClassification": 65.91, + "MassiveScenarioClassification": 68.53, + "ToxicConversationsClassification": 70.04, + "TweetSentimentExtractionClassification": 62.01 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "sentence-t5-xxl", + "AlloProfClusteringP2P": 60.98, + "AlloProfClusteringS2S": 43.5, + "ArxivClusteringP2P": 42.89, + "ArxivClusteringS2S": 33.47, + "BiorxivClusteringP2P": 36.53, + "BiorxivClusteringS2S": 28.66, + "BlurbsClusteringP2P": 39.91, + "BlurbsClusteringS2S": 15.94, + "HALClusteringS2S": 21.4, + "MLSUMClusteringP2P": 42.24, + "MLSUMClusteringS2S": 35.25, + "MasakhaNEWSClusteringP2P": 61.15, + "MasakhaNEWSClusteringS2S": 38.24, + "MedrxivClusteringP2P": 32.09, + "MedrxivClusteringS2S": 26.82, + "RedditClustering": 58.99, + "RedditClusteringP2P": 64.46, + "StackExchangeClustering": 70.78, + "StackExchangeClusteringP2P": 35.25, + "TenKGnadClusteringP2P": 43.43, + "TenKGnadClusteringS2S": 19.69, + "TwentyNewsgroupsClustering": 50.93 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "SprintDuplicateQuestions": 77.36, - "TwitterSemEval2015": 61.54, - "TwitterURLCorpus": 77.73 + "Model": "sentence-t5-xxl", + "OpusparcusPC": 93.94, + "PawsXPairClassification": 63.98, + "SprintDuplicateQuestions": 88.89, + "TwitterSemEval2015": 80.28, + "TwitterURLCorpus": 86.01 }, { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "SprintDuplicateQuestions": 77.36, - "TwitterSemEval2015": 61.54, - "TwitterURLCorpus": 77.73 + "Model": "sentence-t5-xxl", + "OpusparcusPC": 93.94, + "PawsXPairClassification": 64.01, + "SprintDuplicateQuestions": 88.91, + "TwitterSemEval2015": 80.28, + "TwitterURLCorpus": 86.01 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "AskUbuntuDupQuestions": 52.7, - "MindSmallReranking": 29.52, - "SciDocsRR": 67.76, - "StackOverflowDupQuestions": 40.82 + "Model": "sentence-t5-xxl", + "AlloprofReranking": 68.36, + "AskUbuntuDupQuestions": 66.16, + "MindSmallReranking": 30.6, + "SciDocsRR": 76.09, + "StackOverflowDupQuestions": 52.85, + "SyntecReranking": 85.15 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "ArguAna": 43.64, - "CQADupstackRetrieval": 18.5, - "ClimateFEVER": 18.95, - "DBPedia": 13.21, - "FEVER": 16.96, - "FiQA2018": 16.99, - "HotpotQA": 22.64, - "MSMARCO": 7.03, - "NFCorpus": 15.73, - "NQ": 17.96, - "QuoraRetrieval": 78.23, - "SCIDOCS": 5.53, - "SciFact": 38.31, - "TRECCOVID": 56.04, - "Touche2020": 19.17 + "Model": "sentence-t5-xxl", + "AlloprofRetrieval": 45.75, + "ArguAna": 39.85, + "BSARDRetrieval": 3.33, + "CQADupstackRetrieval": 44.65, + "ClimateFEVER": 14.63, + "DBPedia": 39.19, + "FEVER": 51.2, + "FiQA2018": 46.68, + "HotpotQA": 42.14, + "MSMARCO": 27.67, + "MintakaRetrieval": 34.93, + "NFCorpus": 35.08, + "NQ": 52.87, + "QuoraRetrieval": 85.96, + "SCIDOCS": 17.17, + "SciFact": 55.38, + "SyntecRetrieval": 78.97, + "TRECCOVID": 59.48, + "Touche2020": 21.65, + "XPQARetrieval": 56.2 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "BIOSSES": 75.12, - "SICK-R": 69.34, - "STS12": 60.09, - "STS13": 72.52, - "STS14": 66.7, - "STS15": 77.69, - "STS16": 75.94, - "STS17 (en-en)": 81.67, - "STS22 (en)": 63.7, - "STSBenchmark": 73.36 + "Model": "sentence-t5-xxl", + "BIOSSES": 80.43, + "SICK-R": 80.47, + "SICKFr": 77.07, + "STS12": 78.85, + "STS13": 88.94, + "STS14": 84.86, + "STS15": 89.32, + "STS16": 84.67, + "STS17": 89.46, + "STS22": 76.8, + "STSBenchmark": 84.01, + "STSBenchmarkMultilingualSTS": 81.24 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LLM2Vec-Sheared-Llama-unsupervised", - "SummEval": 31.23 + "Model": "sentence-t5-xxl", + "SummEval": 30.08, + "SummEvalFr": 30.39 } ] }, @@ -19106,18 +18858,49 @@ "p-MRR": [] } }, - "bm25s": { + "silver-retriever-base-v1": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "silver-retriever-base-v1", + "AllegroReviews": 33.35, + "CBD": 68.51, + "MassiveIntentClassification": 66.63, + "MassiveScenarioClassification": 69.97, + "PAC": 66.26, + "PolEmo2.0-IN": 63.52, + "PolEmo2.0-OUT": 44.7 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "silver-retriever-base-v1", + "8TagsClustering": 31.49 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "silver-retriever-base-v1", + "CDSC-E": 67.35, + "PPC": 85.33, + "PSC": 98.46, + "SICK-E-PL": 58.19 + }, + { + "Model": "silver-retriever-base-v1", + "CDSC-E": 67.56, + "PPC": 85.98, + "PSC": 98.6, + "SICK-E-PL": 58.54 + } + ] }, "Reranking": { "map": [] @@ -19125,27 +18908,30 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "bm25s", - "ArguAna": 49.28, - "CQADupstackRetrieval": 31.86, - "ClimateFEVER": 13.62, - "DBPedia": 29.91, - "FEVER": 48.09, - "FiQA2018": 25.14, - "HotpotQA": 56.91, - "MSMARCO": 21.89, - "NFCorpus": 32.08, - "NQ": 28.5, - "QuoraRetrieval": 80.42, - "SCIDOCS": 15.78, - "SciFact": 68.7, - "TRECCOVID": 62.31, - "Touche2020": 33.05 + "Model": "silver-retriever-base-v1", + "ArguAna-PL": 44.12, + "DBPedia-PL": 26.32, + "FiQA-PL": 24.95, + "HotpotQA-PL": 45.13, + "MSMARCO-PL": 25.47, + "NFCorpus-PL": 28.55, + "NQ-PL": 37.9, + "Quora-PL": 77.98, + "SCIDOCS-PL": 10.9, + "SciFact-PL": 54.44, + "TRECCOVID-PL": 46.98 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "silver-retriever-base-v1", + "CDSC-R": 89.09, + "SICK-R-PL": 67.26, + "STS22": 38.69 + } + ] }, "Summarization": { "cosine_spearman": [] @@ -19157,47 +18943,47 @@ "p-MRR": [] } }, - "st-polish-paraphrase-from-mpnet": { + "st-polish-paraphrase-from-distilroberta": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "st-polish-paraphrase-from-mpnet", - "AllegroReviews": 34.55, - "CBD": 67.48, - "MassiveIntentClassification (pl)": 65.93, - "MassiveScenarioClassification (pl)": 71.85, - "PAC": 63.25, - "PolEmo2.0-IN": 68.37, - "PolEmo2.0-OUT": 30.99 + "Model": "st-polish-paraphrase-from-distilroberta", + "AllegroReviews": 34.5, + "CBD": 70.27, + "MassiveIntentClassification": 64.81, + "MassiveScenarioClassification": 70.01, + "PAC": 64.6, + "PolEmo2.0-IN": 67.06, + "PolEmo2.0-OUT": 38.58 } ] }, "Clustering": { "v_measure": [ { - "Model": "st-polish-paraphrase-from-mpnet", - "8TagsClustering": 33.15 + "Model": "st-polish-paraphrase-from-distilroberta", + "8TagsClustering": 31.68 } ] }, "PairClassification": { "max_ap": [ { - "Model": "st-polish-paraphrase-from-mpnet", - "CDSC-E": 75.06, - "PPC": 93.49, - "PSC": 99.05, - "SICK-E-PL": 80.56 + "Model": "st-polish-paraphrase-from-distilroberta", + "CDSC-E": 75.99, + "PPC": 93.29, + "PSC": 99.1, + "SICK-E-PL": 79.63 }, { - "Model": "st-polish-paraphrase-from-mpnet", - "CDSC-E": 75.06, - "PPC": 93.67, - "PSC": 99.05, - "SICK-E-PL": 80.56 + "Model": "st-polish-paraphrase-from-distilroberta", + "CDSC-E": 75.99, + "PPC": 93.31, + "PSC": 99.1, + "SICK-E-PL": 79.63 } ] }, @@ -19207,28 +18993,28 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "st-polish-paraphrase-from-mpnet", - "ArguAna-PL": 51.87, - "DBPedia-PL": 24.59, - "FiQA-PL": 22.27, - "HotpotQA-PL": 32.11, - "MSMARCO-PL": 17.91, - "NFCorpus-PL": 24.05, - "NQ-PL": 23.54, - "Quora-PL": 81.49, - "SCIDOCS-PL": 13.23, - "SciFact-PL": 52.51, - "TRECCOVID-PL": 35.23 + "Model": "st-polish-paraphrase-from-distilroberta", + "ArguAna-PL": 49.42, + "DBPedia-PL": 19.82, + "FiQA-PL": 19.58, + "HotpotQA-PL": 23.47, + "MSMARCO-PL": 16.51, + "NFCorpus-PL": 22.49, + "NQ-PL": 19.83, + "Quora-PL": 81.17, + "SCIDOCS-PL": 12.15, + "SciFact-PL": 49.49, + "TRECCOVID-PL": 38.97 } ] }, "STS": { "cosine_spearman": [ { - "Model": "st-polish-paraphrase-from-mpnet", - "CDSC-R": 88.55, - "SICK-R-PL": 76.18, - "STS22 (pl)": 37.34 + "Model": "st-polish-paraphrase-from-distilroberta", + "CDSC-R": 89.62, + "SICK-R-PL": 76.37, + "STS22": 40.36 } ] }, @@ -19242,89 +19028,78 @@ "p-MRR": [] } }, - "text2vec-large-chinese": { + "st-polish-paraphrase-from-mpnet": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "text2vec-large-chinese", - "AmazonReviewsClassification (zh)": 33.77, - "IFlyTek": 41.54, - "JDReview": 81.56, - "MassiveIntentClassification (zh-CN)": 63.23, - "MassiveScenarioClassification (zh-CN)": 68.45, - "MultilingualSentiment": 58.97, - "OnlineShopping": 83.51, - "TNews": 38.92, - "Waimai": 76.01 + "Model": "st-polish-paraphrase-from-mpnet", + "AllegroReviews": 34.55, + "CBD": 67.48, + "MassiveIntentClassification": 65.93, + "MassiveScenarioClassification": 71.85, + "PAC": 63.25, + "PolEmo2.0-IN": 68.37, + "PolEmo2.0-OUT": 30.99 } ] }, "Clustering": { "v_measure": [ { - "Model": "text2vec-large-chinese", - "CLSClusteringP2P": 30.13, - "CLSClusteringS2S": 28.77, - "ThuNewsClusteringP2P": 35.05, - "ThuNewsClusteringS2S": 26.14 + "Model": "st-polish-paraphrase-from-mpnet", + "8TagsClustering": 33.15 } ] }, "PairClassification": { "max_ap": [ { - "Model": "text2vec-large-chinese", - "Cmnli": 77.67, - "Ocnli": 64.04 + "Model": "st-polish-paraphrase-from-mpnet", + "CDSC-E": 75.06, + "PPC": 93.49, + "PSC": 99.05, + "SICK-E-PL": 80.56 }, { - "Model": "text2vec-large-chinese", - "Cmnli": 77.85, - "Ocnli": 64.47 + "Model": "st-polish-paraphrase-from-mpnet", + "CDSC-E": 75.06, + "PPC": 93.67, + "PSC": 99.05, + "SICK-E-PL": 80.56 } ] }, "Reranking": { - "map": [ - { - "Model": "text2vec-large-chinese", - "CMedQAv1": 58.92, - "CMedQAv2": 60.41, - "MMarcoReranking": 12.48, - "T2Reranking": 64.82 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text2vec-large-chinese", - "CmedqaRetrieval": 15.53, - "CovidRetrieval": 60.48, - "DuRetrieval": 51.87, - "EcomRetrieval": 37.58, - "MMarcoRetrieval": 45.96, - "MedicalRetrieval": 30.93, - "T2Retrieval": 50.52, - "VideoRetrieval": 42.65 + "Model": "st-polish-paraphrase-from-mpnet", + "ArguAna-PL": 51.87, + "DBPedia-PL": 24.59, + "FiQA-PL": 22.27, + "HotpotQA-PL": 32.11, + "MSMARCO-PL": 17.91, + "NFCorpus-PL": 24.05, + "NQ-PL": 23.54, + "Quora-PL": 81.49, + "SCIDOCS-PL": 13.23, + "SciFact-PL": 52.51, + "TRECCOVID-PL": 35.23 } ] }, "STS": { "cosine_spearman": [ { - "Model": "text2vec-large-chinese", - "AFQMC": 24.51, - "ATEC": 32.45, - "BQ": 44.22, - "LCQMC": 69.16, - "PAWSX": 14.55, - "QBQTC": 29.51, - "STS22 (zh)": 65.94, - "STSB": 79.45 + "Model": "st-polish-paraphrase-from-mpnet", + "CDSC-R": 88.55, + "SICK-R-PL": 76.18, + "STS22": 37.34 } ] }, @@ -19338,118 +19113,118 @@ "p-MRR": [] } }, - "LLM2Vec-Mistral-unsupervised": { + "sup-simcse-bert-base-uncased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "AmazonCounterfactualClassification (en)": 76.94, - "AmazonPolarityClassification": 85.29, - "AmazonReviewsClassification (en)": 47.09, - "Banking77Classification": 86.16, - "EmotionClassification": 48.88, - "ImdbClassification": 77.95, - "MTOPDomainClassification (en)": 95.48, - "MTOPIntentClassification (en)": 82.84, - "MassiveIntentClassification (en)": 76.65, - "MassiveScenarioClassification (en)": 79.99, - "ToxicConversationsClassification": 70.71, - "TweetSentimentExtractionClassification": 60.9 + "Model": "sup-simcse-bert-base-uncased", + "AmazonCounterfactualClassification": 75.75, + "AmazonPolarityClassification": 82.47, + "AmazonReviewsClassification": 39.6, + "Banking77Classification": 75.76, + "EmotionClassification": 44.81, + "ImdbClassification": 73.53, + "MTOPDomainClassification": 84.29, + "MTOPIntentClassification": 63.14, + "MassiveIntentClassification": 65.95, + "MassiveScenarioClassification": 70.78, + "ToxicConversationsClassification": 72.04, + "TweetSentimentExtractionClassification": 59.73 } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "ArxivClusteringP2P": 47.56, - "ArxivClusteringS2S": 39.92, - "BiorxivClusteringP2P": 36.14, - "BiorxivClusteringS2S": 30.26, - "MedrxivClusteringP2P": 30.11, - "MedrxivClusteringS2S": 26.93, - "RedditClustering": 41.83, - "RedditClusteringP2P": 62.08, - "StackExchangeClustering": 67.34, - "StackExchangeClusteringP2P": 34.5, - "TwentyNewsgroupsClustering": 30.26 + "Model": "sup-simcse-bert-base-uncased", + "ArxivClusteringP2P": 35.18, + "ArxivClusteringS2S": 27.54, + "BiorxivClusteringP2P": 30.15, + "BiorxivClusteringS2S": 24.67, + "MedrxivClusteringP2P": 26.25, + "MedrxivClusteringS2S": 24.12, + "RedditClustering": 40.23, + "RedditClusteringP2P": 47.74, + "StackExchangeClustering": 47.55, + "StackExchangeClusteringP2P": 29.45, + "TwentyNewsgroupsClustering": 34.86 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "SprintDuplicateQuestions": 91.3, - "TwitterSemEval2015": 68.76, - "TwitterURLCorpus": 82.76 + "Model": "sup-simcse-bert-base-uncased", + "SprintDuplicateQuestions": 69.39, + "TwitterSemEval2015": 67.75, + "TwitterURLCorpus": 83.89 }, { - "Model": "LLM2Vec-Mistral-unsupervised", - "SprintDuplicateQuestions": 91.3, - "TwitterSemEval2015": 68.76, - "TwitterURLCorpus": 82.76 + "Model": "sup-simcse-bert-base-uncased", + "SprintDuplicateQuestions": 73.04, + "TwitterSemEval2015": 67.75, + "TwitterURLCorpus": 83.89 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "AskUbuntuDupQuestions": 58.6, - "MindSmallReranking": 29.73, - "SciDocsRR": 77.81, - "StackOverflowDupQuestions": 49.8 + "Model": "sup-simcse-bert-base-uncased", + "AskUbuntuDupQuestions": 51.8, + "MindSmallReranking": 29.3, + "SciDocsRR": 70.14, + "StackOverflowDupQuestions": 38.9 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "ArguAna": 51.0, - "CQADupstackRetrieval": 33.37, - "ClimateFEVER": 22.97, - "DBPedia": 25.48, - "FEVER": 45.11, - "FiQA2018": 27.24, - "HotpotQA": 54.54, - "MSMARCO": 19.13, - "NFCorpus": 27.16, - "NQ": 34.16, - "QuoraRetrieval": 84.4, - "SCIDOCS": 15.35, - "SciFact": 68.68, - "TRECCOVID": 55.67, - "Touche2020": 6.54 + "Model": "sup-simcse-bert-base-uncased", + "ArguAna": 38.33, + "CQADupstackRetrieval": 14.5, + "ClimateFEVER": 11.98, + "DBPedia": 19.73, + "FEVER": 20.41, + "FiQA2018": 10.41, + "HotpotQA": 22.9, + "MSMARCO": 11.0, + "NFCorpus": 12.42, + "NQ": 16.08, + "QuoraRetrieval": 79.62, + "SCIDOCS": 7.53, + "SciFact": 29.59, + "TRECCOVID": 22.93, + "Touche2020": 9.9 } ] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "BIOSSES": 83.29, - "SICK-R": 75.55, - "STS12": 67.65, - "STS13": 83.9, - "STS14": 76.97, - "STS15": 83.8, - "STS16": 81.91, - "STS17 (en-en)": 85.58, - "STS22 (en)": 65.93, - "STSBenchmark": 80.42 + "Model": "sup-simcse-bert-base-uncased", + "BIOSSES": 68.38, + "SICK-R": 80.77, + "STS12": 75.3, + "STS13": 84.67, + "STS14": 80.19, + "STS15": 85.4, + "STS16": 80.82, + "STS17": 89.44, + "STS22": 61.96, + "STSBenchmark": 84.25 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "LLM2Vec-Mistral-unsupervised", - "SummEval": 30.19 + "Model": "sup-simcse-bert-base-uncased", + "SummEval": 31.17 } ] }, @@ -19460,1074 +19235,367 @@ "p-MRR": [] } }, - "flaubert_base_uncased": { + "tart-dual-contriever-msmarco": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "flaubert_base_uncased", - "AmazonReviewsClassification (fr)": 23.52, - "MTOPDomainClassification (fr)": 27.74, - "MTOPIntentClassification (fr)": 8.61, - "MasakhaNEWSClassification (fra)": 62.61, - "MassiveIntentClassification (fr)": 6.24, - "MassiveScenarioClassification (fr)": 10.98 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "flaubert_base_uncased", - "AlloProfClusteringP2P": 43.2, - "AlloProfClusteringS2S": 12.94, - "HALClusteringS2S": 1.8, - "MLSUMClusteringP2P": 33.22, - "MLSUMClusteringS2S": 14.9, - "MasakhaNEWSClusteringP2P (fra)": 28.49, - "MasakhaNEWSClusteringS2S (fra)": 22.58 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "flaubert_base_uncased", - "OpusparcusPC (fr)": 82.0, - "PawsXPairClassification (fr)": 52.78 - }, - { - "Model": "flaubert_base_uncased", - "OpusparcusPC (fr)": 82.0, - "PawsXPairClassification (fr)": 52.89 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "flaubert_base_uncased", - "AlloprofReranking": 34.55, - "SyntecReranking": 57.18 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "flaubert_base_uncased", - "AlloprofRetrieval": 1.72, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.51, - "SyntecRetrieval": 22.33, - "XPQARetrieval (fr)": 9.09 - } - ] + "map": [] }, - "STS": { - "cosine_spearman": [ - { - "Model": "flaubert_base_uncased", - "SICKFr": 41.9, - "STS22 (fr)": 55.15, - "STSBenchmarkMultilingualSTS (fr)": 33.41 - } - ] + "Retrieval": { + "ndcg_at_10": [] + }, + "STS": { + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "flaubert_base_uncased", - "SummEvalFr": 29.43 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "tart-dual-contriever-msmarco", + "Core17InstructionRetrieval": -3.04, + "News21InstructionRetrieval": -2.98, + "Robust04InstructionRetrieval": -8.98 + } + ] } }, - "flaubert_base_cased": { + "tart-full-flan-t5-xl": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "flaubert_base_cased", - "AmazonReviewsClassification (fr)": 24.9, - "MTOPDomainClassification (fr)": 25.55, - "MTOPIntentClassification (fr)": 9.49, - "MasakhaNEWSClassification (fra)": 71.14, - "MassiveIntentClassification (fr)": 6.98, - "MassiveScenarioClassification (fr)": 11.41 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "flaubert_base_cased", - "AlloProfClusteringP2P": 52.86, - "AlloProfClusteringS2S": 14.46, - "HALClusteringS2S": 3.85, - "MLSUMClusteringP2P": 39.06, - "MLSUMClusteringS2S": 17.13, - "MasakhaNEWSClusteringP2P (fra)": 41.61, - "MasakhaNEWSClusteringS2S (fra)": 21.26 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "flaubert_base_cased", - "OpusparcusPC (fr)": 82.15, - "PawsXPairClassification (fr)": 51.89 - }, - { - "Model": "flaubert_base_cased", - "OpusparcusPC (fr)": 82.15, - "PawsXPairClassification (fr)": 52.19 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "flaubert_base_cased", - "AlloprofReranking": 34.81, - "SyntecReranking": 55.88 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "flaubert_base_cased", - "AlloprofRetrieval": 1.63, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 0.58, - "SyntecRetrieval": 20.56, - "XPQARetrieval (fr)": 6.59 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "flaubert_base_cased", - "SICKFr": 53.86, - "STS22 (fr)": 65.37, - "STSBenchmarkMultilingualSTS (fr)": 37.14 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "flaubert_base_cased", - "SummEvalFr": 31.26 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [] - } - }, - "all-MiniLM-L12-v2": { - "BitextMining": { - "f1": [ + "p-MRR": [ { - "Model": "all-MiniLM-L12-v2", - "BornholmBitextMining (dan-Latn)": 35.25, - "Tatoeba (tat-Cyrl_eng-Latn)": 0.75, - "Tatoeba (yid-Hebr_eng-Latn)": 0.19, - "Tatoeba (tzl-Latn_eng-Latn)": 6.87, - "Tatoeba (ben-Beng_eng-Latn)": 0.02, - "Tatoeba (sqi-Latn_eng-Latn)": 5.86, - "Tatoeba (war-Latn_eng-Latn)": 6.18, - "Tatoeba (nld-Latn_eng-Latn)": 12.56, - "Tatoeba (ast-Latn_eng-Latn)": 9.99, - "Tatoeba (awa-Deva_eng-Latn)": 0.44, - "Tatoeba (jpn-Jpan_eng-Latn)": 2.18, - "Tatoeba (kat-Geor_eng-Latn)": 0.45, - "Tatoeba (pes-Arab_eng-Latn)": 0.3, - "Tatoeba (fra-Latn_eng-Latn)": 17.53, - "Tatoeba (nds-Latn_eng-Latn)": 11.35, - "Tatoeba (gle-Latn_eng-Latn)": 3.08, - "Tatoeba (arz-Arab_eng-Latn)": 0.0, - "Tatoeba (srp-Cyrl_eng-Latn)": 2.22, - "Tatoeba (mhr-Cyrl_eng-Latn)": 0.0, - "Tatoeba (bos-Latn_eng-Latn)": 7.05, - "Tatoeba (heb-Hebr_eng-Latn)": 0.3, - "Tatoeba (orv-Cyrl_eng-Latn)": 0.15, - "Tatoeba (kaz-Cyrl_eng-Latn)": 0.82, - "Tatoeba (eus-Latn_eng-Latn)": 6.58, - "Tatoeba (hsb-Latn_eng-Latn)": 2.89, - "Tatoeba (max-Deva_eng-Latn)": 8.4, - "Tatoeba (kab-Latn_eng-Latn)": 0.91, - "Tatoeba (hrv-Latn_eng-Latn)": 5.68, - "Tatoeba (deu-Latn_eng-Latn)": 13.89, - "Tatoeba (kor-Hang_eng-Latn)": 0.9, - "Tatoeba (slk-Latn_eng-Latn)": 4.2, - "Tatoeba (tur-Latn_eng-Latn)": 3.69, - "Tatoeba (ron-Latn_eng-Latn)": 8.77, - "Tatoeba (nno-Latn_eng-Latn)": 7.45, - "Tatoeba (ido-Latn_eng-Latn)": 11.08, - "Tatoeba (est-Latn_eng-Latn)": 2.6, - "Tatoeba (ceb-Latn_eng-Latn)": 3.95, - "Tatoeba (bre-Latn_eng-Latn)": 3.68, - "Tatoeba (lfn-Latn_eng-Latn)": 7.52, - "Tatoeba (ina-Latn_eng-Latn)": 25.36, - "Tatoeba (nov-Latn_eng-Latn)": 19.45, - "Tatoeba (slv-Latn_eng-Latn)": 4.52, - "Tatoeba (fry-Latn_eng-Latn)": 14.53, - "Tatoeba (cbk-Latn_eng-Latn)": 9.76, - "Tatoeba (lvs-Latn_eng-Latn)": 3.45, - "Tatoeba (yue-Hant_eng-Latn)": 1.89, - "Tatoeba (ces-Latn_eng-Latn)": 4.2, - "Tatoeba (afr-Latn_eng-Latn)": 7.59, - "Tatoeba (rus-Cyrl_eng-Latn)": 0.07, - "Tatoeba (amh-Ethi_eng-Latn)": 0.01, - "Tatoeba (ang-Latn_eng-Latn)": 14.63, - "Tatoeba (cat-Latn_eng-Latn)": 11.79, - "Tatoeba (khm-Khmr_eng-Latn)": 0.42, - "Tatoeba (pam-Latn_eng-Latn)": 4.73, - "Tatoeba (pms-Latn_eng-Latn)": 8.94, - "Tatoeba (gsw-Latn_eng-Latn)": 9.9, - "Tatoeba (swg-Latn_eng-Latn)": 11.9, - "Tatoeba (tel-Telu_eng-Latn)": 0.67, - "Tatoeba (nob-Latn_eng-Latn)": 8.02, - "Tatoeba (uig-Arab_eng-Latn)": 0.4, - "Tatoeba (bel-Cyrl_eng-Latn)": 0.85, - "Tatoeba (lit-Latn_eng-Latn)": 1.56, - "Tatoeba (isl-Latn_eng-Latn)": 3.44, - "Tatoeba (swh-Latn_eng-Latn)": 5.82, - "Tatoeba (tha-Thai_eng-Latn)": 0.67, - "Tatoeba (mon-Cyrl_eng-Latn)": 0.06, - "Tatoeba (hin-Deva_eng-Latn)": 0.0, - "Tatoeba (swe-Latn_eng-Latn)": 7.31, - "Tatoeba (epo-Latn_eng-Latn)": 8.5, - "Tatoeba (ind-Latn_eng-Latn)": 5.3, - "Tatoeba (tgl-Latn_eng-Latn)": 3.34, - "Tatoeba (arq-Arab_eng-Latn)": 0.28, - "Tatoeba (aze-Latn_eng-Latn)": 1.47, - "Tatoeba (por-Latn_eng-Latn)": 11.36, - "Tatoeba (hun-Latn_eng-Latn)": 3.93, - "Tatoeba (kur-Latn_eng-Latn)": 7.3, - "Tatoeba (urd-Arab_eng-Latn)": 0.0, - "Tatoeba (csb-Latn_eng-Latn)": 5.21, - "Tatoeba (glg-Latn_eng-Latn)": 12.6, - "Tatoeba (wuu-Hans_eng-Latn)": 1.89, - "Tatoeba (uzb-Latn_eng-Latn)": 2.2, - "Tatoeba (fao-Latn_eng-Latn)": 5.92, - "Tatoeba (mal-Mlym_eng-Latn)": 0.24, - "Tatoeba (dsb-Latn_eng-Latn)": 3.06, - "Tatoeba (jav-Latn_eng-Latn)": 3.5, - "Tatoeba (xho-Latn_eng-Latn)": 3.66, - "Tatoeba (dan-Latn_eng-Latn)": 10.21, - "Tatoeba (gla-Latn_eng-Latn)": 2.58, - "Tatoeba (spa-Latn_eng-Latn)": 11.26, - "Tatoeba (ber-Tfng_eng-Latn)": 4.72, - "Tatoeba (ukr-Cyrl_eng-Latn)": 0.57, - "Tatoeba (cym-Latn_eng-Latn)": 5.13, - "Tatoeba (cha-Latn_eng-Latn)": 13.07, - "Tatoeba (pol-Latn_eng-Latn)": 4.29, - "Tatoeba (fin-Latn_eng-Latn)": 3.65, - "Tatoeba (bul-Cyrl_eng-Latn)": 0.23, - "Tatoeba (tuk-Latn_eng-Latn)": 2.66, - "Tatoeba (tam-Taml_eng-Latn)": 0.33, - "Tatoeba (mar-Deva_eng-Latn)": 0.04, - "Tatoeba (vie-Latn_eng-Latn)": 5.06, - "Tatoeba (ell-Grek_eng-Latn)": 0.2, - "Tatoeba (lat-Latn_eng-Latn)": 7.14, - "Tatoeba (zsm-Latn_eng-Latn)": 5.99, - "Tatoeba (ita-Latn_eng-Latn)": 12.57, - "Tatoeba (ara-Arab_eng-Latn)": 0.43, - "Tatoeba (cor-Latn_eng-Latn)": 2.47, - "Tatoeba (oci-Latn_eng-Latn)": 8.72, - "Tatoeba (mkd-Cyrl_eng-Latn)": 0.01, - "Tatoeba (ile-Latn_eng-Latn)": 17.43, - "Tatoeba (kzj-Latn_eng-Latn)": 3.64, - "Tatoeba (cmn-Hans_eng-Latn)": 2.45, - "Tatoeba (dtp-Latn_eng-Latn)": 3.31, - "Tatoeba (hye-Armn_eng-Latn)": 0.5 + "Model": "tart-full-flan-t5-xl", + "Core17InstructionRetrieval": 2.82, + "News21InstructionRetrieval": 1.99, + "Robust04InstructionRetrieval": -0.72 } ] + } + }, + "text-embedding-3-large": { + "BitextMining": { + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "all-MiniLM-L12-v2", - "AllegroReviews (pol-Latn)": 23.85, - "AmazonCounterfactualClassification (de)": 57.1, - "AmazonCounterfactualClassification (en)": 65.28, - "AmazonCounterfactualClassification (en-ext)": 67.24, - "AmazonCounterfactualClassification (ja)": 59.91, - "AmazonCounterfactualClassification (deu-Latn)": 57.13, - "AmazonCounterfactualClassification (jpn-Jpan)": 59.94, - "AmazonPolarityClassification": 62.99, - "AmazonReviewsClassification (de)": 25.91, - "AmazonReviewsClassification (en)": 30.79, - "AmazonReviewsClassification (es)": 27.63, - "AmazonReviewsClassification (fr)": 27.54, - "AmazonReviewsClassification (ja)": 23.57, - "AmazonReviewsClassification (zh)": 22.99, - "AmazonReviewsClassification (deu-Latn)": 25.92, - "AmazonReviewsClassification (spa-Latn)": 27.64, - "AmazonReviewsClassification (fra-Latn)": 27.53, - "AmazonReviewsClassification (jpn-Jpan)": 23.57, - "AmazonReviewsClassification (cmn-Hans)": 22.99, - "AngryTweetsClassification (dan-Latn)": 42.87, - "Banking77Classification": 80.41, - "CBD (pol-Latn)": 48.46, - "DanishPoliticalCommentsClassification (dan-Latn)": 27.07, - "EmotionClassification": 41.17, - "GeoreviewClassification (rus-Cyrl)": 23.49, - "HeadlineClassification (rus-Cyrl)": 28.49, - "IFlyTek (cmn-Hans)": 15.31, - "ImdbClassification": 59.78, - "InappropriatenessClassification (rus-Cyrl)": 50.85, - "JDReview (cmn-Hans)": 59.57, - "KinopoiskClassification (rus-Cyrl)": 34.17, - "LccSentimentClassification (dan-Latn)": 41.93, - "MTOPDomainClassification (de)": 72.04, - "MTOPDomainClassification (en)": 91.88, - "MTOPDomainClassification (es)": 72.99, - "MTOPDomainClassification (fr)": 75.59, - "MTOPDomainClassification (hi)": 40.36, - "MTOPDomainClassification (th)": 17.1, - "MTOPDomainClassification (deu-Latn)": 72.04, - "MTOPDomainClassification (spa-Latn)": 72.99, - "MTOPDomainClassification (fra-Latn)": 75.57, - "MTOPDomainClassification (hin-Deva)": 40.4, - "MTOPDomainClassification (tha-Thai)": 17.1, - "MTOPIntentClassification (de)": 43.41, - "MTOPIntentClassification (en)": 62.83, - "MTOPIntentClassification (es)": 41.88, - "MTOPIntentClassification (fr)": 38.94, - "MTOPIntentClassification (hi)": 17.75, - "MTOPIntentClassification (th)": 5.63, - "MTOPIntentClassification (deu-Latn)": 43.42, - "MTOPIntentClassification (spa-Latn)": 41.91, - "MTOPIntentClassification (fra-Latn)": 38.96, - "MTOPIntentClassification (hin-Deva)": 17.76, - "MTOPIntentClassification (tha-Thai)": 5.9, - "MasakhaNEWSClassification (fra)": 72.2, - "MasakhaNEWSClassification (amh-Ethi)": 30.64, - "MasakhaNEWSClassification (eng)": 76.62, - "MasakhaNEWSClassification (fra-Latn)": 67.18, - "MasakhaNEWSClassification (hau-Latn)": 52.59, - "MasakhaNEWSClassification (ibo-Latn)": 54.26, - "MasakhaNEWSClassification (lin-Latn)": 62.23, - "MasakhaNEWSClassification (lug-Latn)": 47.62, - "MasakhaNEWSClassification (orm-Ethi)": 47.17, - "MasakhaNEWSClassification (pcm-Latn)": 91.77, - "MasakhaNEWSClassification (run-Latn)": 54.47, - "MasakhaNEWSClassification (sna-Latn)": 66.53, - "MasakhaNEWSClassification (som-Latn)": 40.27, - "MasakhaNEWSClassification (swa-Latn)": 47.77, - "MasakhaNEWSClassification (tir-Ethi)": 21.18, - "MasakhaNEWSClassification (xho-Latn)": 54.34, - "MasakhaNEWSClassification (yor-Latn)": 58.61, - "MassiveIntentClassification (af)": 38.94, - "MassiveIntentClassification (am)": 2.45, - "MassiveIntentClassification (ar)": 20.94, - "MassiveIntentClassification (az)": 34.25, - "MassiveIntentClassification (bn)": 13.67, - "MassiveIntentClassification (cy)": 35.71, - "MassiveIntentClassification (da)": 44.43, - "MassiveIntentClassification (de)": 44.17, - "MassiveIntentClassification (el)": 28.7, - "MassiveIntentClassification (en)": 67.11, - "MassiveIntentClassification (es)": 40.91, - "MassiveIntentClassification (fa)": 23.52, - "MassiveIntentClassification (fi)": 39.27, - "MassiveIntentClassification (fr)": 44.82, - "MassiveIntentClassification (he)": 23.65, - "MassiveIntentClassification (hi)": 17.98, - "MassiveIntentClassification (hu)": 38.0, - "MassiveIntentClassification (hy)": 8.69, - "MassiveIntentClassification (id)": 39.66, - "MassiveIntentClassification (is)": 35.14, - "MassiveIntentClassification (it)": 43.17, - "MassiveIntentClassification (ja)": 30.94, - "MassiveIntentClassification (jv)": 36.69, - "MassiveIntentClassification (ka)": 9.17, - "MassiveIntentClassification (km)": 4.99, - "MassiveIntentClassification (kn)": 3.08, - "MassiveIntentClassification (ko)": 19.97, - "MassiveIntentClassification (lv)": 38.61, - "MassiveIntentClassification (ml)": 2.85, - "MassiveIntentClassification (mn)": 23.25, - "MassiveIntentClassification (ms)": 36.21, - "MassiveIntentClassification (my)": 4.38, - "MassiveIntentClassification (nb)": 41.91, - "MassiveIntentClassification (nl)": 41.85, - "MassiveIntentClassification (pl)": 37.63, - "MassiveIntentClassification (pt)": 45.12, - "MassiveIntentClassification (ro)": 41.71, - "MassiveIntentClassification (ru)": 26.33, - "MassiveIntentClassification (sl)": 38.52, - "MassiveIntentClassification (sq)": 41.62, - "MassiveIntentClassification (sv)": 40.42, - "MassiveIntentClassification (sw)": 35.28, - "MassiveIntentClassification (ta)": 13.1, - "MassiveIntentClassification (te)": 2.56, - "MassiveIntentClassification (th)": 10.54, - "MassiveIntentClassification (tl)": 38.56, - "MassiveIntentClassification (tr)": 35.9, - "MassiveIntentClassification (ur)": 16.18, - "MassiveIntentClassification (vi)": 37.38, - "MassiveIntentClassification (zh-CN)": 23.74, - "MassiveIntentClassification (zh-TW)": 22.39, - "MassiveIntentClassification (jpn-Jpan)": 30.89, - "MassiveIntentClassification (cmo-Hans)": 23.74, - "MassiveIntentClassification (nob-Latn)": 41.79, - "MassiveIntentClassification (urd-Arab)": 16.26, - "MassiveIntentClassification (kan-Knda)": 3.07, - "MassiveIntentClassification (rus-Cyrl)": 26.29, - "MassiveIntentClassification (deu-Latn)": 44.12, - "MassiveIntentClassification (aze-Latn)": 34.3, - "MassiveIntentClassification (hun-Latn)": 37.95, - "MassiveIntentClassification (mal-Mlym)": 2.84, - "MassiveIntentClassification (tur-Latn)": 35.93, - "MassiveIntentClassification (kor-Kore)": 19.97, - "MassiveIntentClassification (mon-Cyrl)": 23.27, - "MassiveIntentClassification (tam-Taml)": 13.12, - "MassiveIntentClassification (fra-Latn)": 44.75, - "MassiveIntentClassification (ell-Grek)": 28.68, - "MassiveIntentClassification (swa-Latn)": 35.26, - "MassiveIntentClassification (swe-Latn)": 40.33, - "MassiveIntentClassification (vie-Latn)": 37.35, - "MassiveIntentClassification (msa-Latn)": 36.16, - "MassiveIntentClassification (hin-Deva)": 18.0, - "MassiveIntentClassification (hye-Armn)": 8.69, - "MassiveIntentClassification (pol-Latn)": 37.59, - "MassiveIntentClassification (por-Latn)": 45.08, - "MassiveIntentClassification (fin-Latn)": 39.19, - "MassiveIntentClassification (ara-Arab)": 21.02, - "MassiveIntentClassification (dan-Latn)": 44.35, - "MassiveIntentClassification (afr-Latn)": 38.84, - "MassiveIntentClassification (sqi-Latn)": 41.47, - "MassiveIntentClassification (amh-Ethi)": 2.45, - "MassiveIntentClassification (nld-Latn)": 41.77, - "MassiveIntentClassification (spa-Latn)": 40.82, - "MassiveIntentClassification (ita-Latn)": 43.16, - "MassiveIntentClassification (khm-Khmr)": 4.98, - "MassiveIntentClassification (ind-Latn)": 39.65, - "MassiveIntentClassification (ben-Beng)": 13.7, - "MassiveIntentClassification (isl-Latn)": 35.17, - "MassiveIntentClassification (jav-Latn)": 36.67, - "MassiveIntentClassification (kat-Geor)": 9.17, - "MassiveIntentClassification (ron-Latn)": 41.64, - "MassiveIntentClassification (slv-Latn)": 38.48, - "MassiveIntentClassification (cmo-Hant)": 22.38, - "MassiveIntentClassification (tgl-Latn)": 38.63, - "MassiveIntentClassification (fas-Arab)": 23.56, - "MassiveIntentClassification (tel-Telu)": 2.54, - "MassiveIntentClassification (lav-Latn)": 38.54, - "MassiveIntentClassification (mya-Mymr)": 4.36, - "MassiveIntentClassification (tha-Thai)": 10.46, - "MassiveIntentClassification (heb-Hebr)": 23.71, - "MassiveIntentClassification (cym-Latn)": 35.65, - "MassiveScenarioClassification (af)": 45.71, - "MassiveScenarioClassification (am)": 7.41, - "MassiveScenarioClassification (ar)": 27.62, - "MassiveScenarioClassification (az)": 39.58, - "MassiveScenarioClassification (bn)": 18.98, - "MassiveScenarioClassification (cy)": 41.4, - "MassiveScenarioClassification (da)": 49.47, - "MassiveScenarioClassification (de)": 52.07, - "MassiveScenarioClassification (el)": 35.51, - "MassiveScenarioClassification (en)": 74.57, - "MassiveScenarioClassification (es)": 50.74, - "MassiveScenarioClassification (fa)": 29.0, - "MassiveScenarioClassification (fi)": 45.8, - "MassiveScenarioClassification (fr)": 53.76, - "MassiveScenarioClassification (he)": 25.68, - "MassiveScenarioClassification (hi)": 23.02, - "MassiveScenarioClassification (hu)": 44.09, - "MassiveScenarioClassification (hy)": 14.83, - "MassiveScenarioClassification (id)": 44.35, - "MassiveScenarioClassification (is)": 43.08, - "MassiveScenarioClassification (it)": 51.71, - "MassiveScenarioClassification (ja)": 36.75, - "MassiveScenarioClassification (jv)": 44.57, - "MassiveScenarioClassification (ka)": 14.84, - "MassiveScenarioClassification (km)": 9.75, - "MassiveScenarioClassification (kn)": 8.32, - "MassiveScenarioClassification (ko)": 25.72, - "MassiveScenarioClassification (lv)": 42.75, - "MassiveScenarioClassification (ml)": 7.25, - "MassiveScenarioClassification (mn)": 29.03, - "MassiveScenarioClassification (ms)": 44.65, - "MassiveScenarioClassification (my)": 10.07, - "MassiveScenarioClassification (nb)": 47.36, - "MassiveScenarioClassification (nl)": 49.15, - "MassiveScenarioClassification (pl)": 44.72, - "MassiveScenarioClassification (pt)": 53.0, - "MassiveScenarioClassification (ro)": 49.97, - "MassiveScenarioClassification (ru)": 28.75, - "MassiveScenarioClassification (sl)": 42.26, - "MassiveScenarioClassification (sq)": 49.14, - "MassiveScenarioClassification (sv)": 46.83, - "MassiveScenarioClassification (sw)": 43.18, - "MassiveScenarioClassification (ta)": 19.38, - "MassiveScenarioClassification (te)": 7.74, - "MassiveScenarioClassification (th)": 18.32, - "MassiveScenarioClassification (tl)": 48.31, - "MassiveScenarioClassification (tr)": 41.79, - "MassiveScenarioClassification (ur)": 24.46, - "MassiveScenarioClassification (vi)": 40.94, - "MassiveScenarioClassification (zh-CN)": 33.18, - "MassiveScenarioClassification (zh-TW)": 31.16, - "MassiveScenarioClassification (por-Latn)": 53.0, - "MassiveScenarioClassification (cmo-Hant)": 31.14, - "MassiveScenarioClassification (swe-Latn)": 46.81, - "MassiveScenarioClassification (fas-Arab)": 29.0, - "MassiveScenarioClassification (lav-Latn)": 42.75, - "MassiveScenarioClassification (fra-Latn)": 53.77, - "MassiveScenarioClassification (mal-Mlym)": 7.25, - "MassiveScenarioClassification (isl-Latn)": 43.11, - "MassiveScenarioClassification (sqi-Latn)": 49.12, - "MassiveScenarioClassification (amh-Ethi)": 7.41, - "MassiveScenarioClassification (cym-Latn)": 41.43, - "MassiveScenarioClassification (ron-Latn)": 49.94, - "MassiveScenarioClassification (tha-Thai)": 18.32, - "MassiveScenarioClassification (kan-Knda)": 8.32, - "MassiveScenarioClassification (urd-Arab)": 24.45, - "MassiveScenarioClassification (ind-Latn)": 44.37, - "MassiveScenarioClassification (ell-Grek)": 35.55, - "MassiveScenarioClassification (heb-Hebr)": 25.73, - "MassiveScenarioClassification (mya-Mymr)": 10.06, - "MassiveScenarioClassification (deu-Latn)": 52.08, - "MassiveScenarioClassification (hin-Deva)": 23.03, - "MassiveScenarioClassification (hun-Latn)": 44.1, - "MassiveScenarioClassification (tgl-Latn)": 48.29, - "MassiveScenarioClassification (vie-Latn)": 40.97, - "MassiveScenarioClassification (ben-Beng)": 18.98, - "MassiveScenarioClassification (kat-Geor)": 14.85, - "MassiveScenarioClassification (hye-Armn)": 14.87, - "MassiveScenarioClassification (pol-Latn)": 44.74, - "MassiveScenarioClassification (swa-Latn)": 43.18, - "MassiveScenarioClassification (ita-Latn)": 51.7, - "MassiveScenarioClassification (tur-Latn)": 41.8, - "MassiveScenarioClassification (nld-Latn)": 49.14, - "MassiveScenarioClassification (aze-Latn)": 39.62, - "MassiveScenarioClassification (dan-Latn)": 49.5, - "MassiveScenarioClassification (spa-Latn)": 50.73, - "MassiveScenarioClassification (ara-Arab)": 27.66, - "MassiveScenarioClassification (slv-Latn)": 42.24, - "MassiveScenarioClassification (jav-Latn)": 44.54, - "MassiveScenarioClassification (msa-Latn)": 44.67, - "MassiveScenarioClassification (fin-Latn)": 45.8, - "MassiveScenarioClassification (khm-Khmr)": 9.75, - "MassiveScenarioClassification (rus-Cyrl)": 28.77, - "MassiveScenarioClassification (mon-Cyrl)": 29.01, - "MassiveScenarioClassification (tam-Taml)": 19.4, - "MassiveScenarioClassification (tel-Telu)": 7.74, - "MassiveScenarioClassification (jpn-Jpan)": 36.77, - "MassiveScenarioClassification (kor-Kore)": 25.72, - "MassiveScenarioClassification (cmo-Hans)": 33.19, - "MassiveScenarioClassification (nob-Latn)": 47.35, - "MassiveScenarioClassification (afr-Latn)": 45.72, - "MultilingualSentiment (cmn-Hans)": 40.52, - "NoRecClassification (nob-Latn)": 37.73, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 54.17, - "OnlineShopping (cmn-Hans)": 58.65, - "PAC (pol-Latn)": 59.53, - "PolEmo2.0-IN (pol-Latn)": 38.32, - "PolEmo2.0-OUT (pol-Latn)": 22.98, - "RuReviewsClassification (rus-Cyrl)": 42.49, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 10.49, - "RuSciBenchOECDClassification (rus-Cyrl)": 8.31, - "TNews (cmn-Hans)": 20.37, - "ToxicConversationsClassification": 63.34, - "TweetSentimentExtractionClassification": 54.24, - "Waimai (cmn-Hans)": 63.48 + "Model": "text-embedding-3-large", + "AmazonCounterfactualClassification": 78.93, + "AmazonPolarityClassification": 92.85, + "AmazonReviewsClassification": 48.7, + "Banking77Classification": 85.69, + "EmotionClassification": 51.58, + "ImdbClassification": 87.67, + "MTOPDomainClassification": 95.36, + "MTOPIntentClassification": 75.07, + "MassiveIntentClassification": 74.64, + "MassiveScenarioClassification": 79.79, + "ToxicConversationsClassification": 72.92, + "TweetSentimentExtractionClassification": 62.22 } ] }, "Clustering": { "v_measure": [ { - "Model": "all-MiniLM-L12-v2", - "AlloProfClusteringP2P": 46.03, - "AlloProfClusteringS2S": 31.83, - "ArxivClusteringP2P": 46.07, - "ArxivClusteringS2S": 37.5, - "BiorxivClusteringP2P": 36.99, - "BiorxivClusteringS2S": 33.21, - "GeoreviewClusteringP2P (rus-Cyrl)": 20.51, - "HALClusteringS2S": 19.58, - "MLSUMClusteringP2P": 34.35, - "MLSUMClusteringS2S": 29.3, - "MasakhaNEWSClusteringP2P (fra)": 42.72, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 40.5, - "MasakhaNEWSClusteringP2P (eng)": 55.86, - "MasakhaNEWSClusteringP2P (fra-Latn)": 42.72, - "MasakhaNEWSClusteringP2P (hau-Latn)": 26.61, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 44.26, - "MasakhaNEWSClusteringP2P (lin-Latn)": 54.52, - "MasakhaNEWSClusteringP2P (lug-Latn)": 43.87, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 24.87, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 74.42, - "MasakhaNEWSClusteringP2P (run-Latn)": 51.73, - "MasakhaNEWSClusteringP2P (sna-Latn)": 46.89, - "MasakhaNEWSClusteringP2P (som-Latn)": 31.17, - "MasakhaNEWSClusteringP2P (swa-Latn)": 23.72, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 44.08, - "MasakhaNEWSClusteringP2P (xho-Latn)": 26.97, - "MasakhaNEWSClusteringP2P (yor-Latn)": 32.51, - "MasakhaNEWSClusteringS2S (fra)": 32.47, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.11, - "MasakhaNEWSClusteringS2S (eng)": 40.71, - "MasakhaNEWSClusteringS2S (fra-Latn)": 32.47, - "MasakhaNEWSClusteringS2S (hau-Latn)": 20.63, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 35.33, - "MasakhaNEWSClusteringS2S (lin-Latn)": 54.52, - "MasakhaNEWSClusteringS2S (lug-Latn)": 51.42, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 24.84, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 70.72, - "MasakhaNEWSClusteringS2S (run-Latn)": 50.88, - "MasakhaNEWSClusteringS2S (sna-Latn)": 46.6, - "MasakhaNEWSClusteringS2S (som-Latn)": 29.87, - "MasakhaNEWSClusteringS2S (swa-Latn)": 10.82, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 43.63, - "MasakhaNEWSClusteringS2S (xho-Latn)": 24.55, - "MasakhaNEWSClusteringS2S (yor-Latn)": 32.85, - "MedrxivClusteringP2P": 34.25, - "MedrxivClusteringS2S": 32.24, - "RedditClustering": 51.18, - "RedditClusteringP2P": 54.8, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 10.65, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 10.19, - "StackExchangeClustering": 53.05, - "StackExchangeClusteringP2P": 33.13, - "TwentyNewsgroupsClustering": 47.47 + "Model": "text-embedding-3-large", + "ArxivClusteringP2P": 49.01, + "ArxivClusteringS2S": 44.45, + "BiorxivClusteringP2P": 38.03, + "BiorxivClusteringS2S": 36.53, + "MedrxivClusteringP2P": 32.7, + "MedrxivClusteringS2S": 31.27, + "RedditClustering": 67.84, + "RedditClusteringP2P": 67.96, + "StackExchangeClustering": 76.26, + "StackExchangeClusteringP2P": 36.88, + "TwentyNewsgroupsClustering": 58.14 } ] }, "PairClassification": { "max_ap": [ { - "Model": "all-MiniLM-L12-v2", - "CDSC-E (pol-Latn)": 49.04, - "OpusparcusPC (deu-Latn)": 91.2, - "OpusparcusPC (en)": 97.41, - "OpusparcusPC (fin-Latn)": 85.99, - "OpusparcusPC (fra-Latn)": 87.35, - "OpusparcusPC (rus-Cyrl)": 79.23, - "OpusparcusPC (swe-Latn)": 84.87, - "PSC (pol-Latn)": 87.92, - "PawsXPairClassification (deu-Latn)": 50.83, - "PawsXPairClassification (en)": 58.62, - "PawsXPairClassification (spa-Latn)": 52.08, - "PawsXPairClassification (fra-Latn)": 55.54, - "PawsXPairClassification (jpn-Hira)": 47.75, - "PawsXPairClassification (kor-Hang)": 49.59, - "PawsXPairClassification (cmn-Hans)": 52.8, - "SICK-E-PL (pol-Latn)": 49.63, - "SprintDuplicateQuestions": 92.45, - "TERRa (rus-Cyrl)": 46.4, - "TwitterSemEval2015": 70.02, - "TwitterURLCorpus": 84.77 - }, - { - "Model": "all-MiniLM-L12-v2", - "CDSC-E (pol-Latn)": 49.04, - "OpusparcusPC (deu-Latn)": 91.2, - "OpusparcusPC (en)": 97.41, - "OpusparcusPC (fin-Latn)": 85.99, - "OpusparcusPC (fra-Latn)": 87.35, - "OpusparcusPC (rus-Cyrl)": 79.23, - "OpusparcusPC (swe-Latn)": 84.87, - "PSC (pol-Latn)": 87.92, - "PawsXPairClassification (deu-Latn)": 51.09, - "PawsXPairClassification (en)": 58.7, - "PawsXPairClassification (spa-Latn)": 52.08, - "PawsXPairClassification (fra-Latn)": 55.59, - "PawsXPairClassification (jpn-Hira)": 47.98, - "PawsXPairClassification (kor-Hang)": 49.59, - "PawsXPairClassification (cmn-Hans)": 53.01, - "SICK-E-PL (pol-Latn)": 49.63, - "SprintDuplicateQuestions": 92.58, - "TERRa (rus-Cyrl)": 46.4, - "TwitterSemEval2015": 70.02, - "TwitterURLCorpus": 84.77 + "Model": "text-embedding-3-large", + "SprintDuplicateQuestions": 92.25, + "TwitterSemEval2015": 77.13, + "TwitterURLCorpus": 87.78 } ] }, "Reranking": { "map": [ { - "Model": "all-MiniLM-L12-v2", - "AlloprofReranking (fra-Latn)": 67.01, - "AskUbuntuDupQuestions": 64.06, - "MMarcoReranking (cmn-Hans)": 5.27, - "MindSmallReranking": 31.02, - "RuBQReranking (rus-Cyrl)": 38.51, - "SciDocsRR": 87.2, - "StackOverflowDupQuestions": 51.47, - "SyntecReranking (fra-Latn)": 69.17, - "T2Reranking (cmn-Hans)": 60.32 + "Model": "text-embedding-3-large", + "AskUbuntuDupQuestions": 65.03, + "MindSmallReranking": 29.86, + "SciDocsRR": 86.66, + "StackOverflowDupQuestions": 55.08 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-large", + "AILACasedocs": 39.0, + "AILAStatutes": 41.31, + "ARCChallenge": 23.98, + "AlphaNLI": 37.27, + "ArguAna": 58.05, + "BrightRetrieval (theoremqa_questions)": 22.22, + "BrightRetrieval (leetcode)": 23.65, + "BrightRetrieval (earth_science)": 26.27, + "BrightRetrieval (psychology)": 27.52, + "BrightRetrieval (robotics)": 12.93, + "BrightRetrieval (economics)": 19.98, + "BrightRetrieval (stackoverflow)": 12.49, + "BrightRetrieval (biology)": 23.67, + "BrightRetrieval (theoremqa_theorems)": 10.82, + "BrightRetrieval (pony)": 2.45, + "BrightRetrieval (sustainable_living)": 20.32, + "BrightRetrieval (aops)": 8.45, + "CQADupstackRetrieval": 47.54, + "ClimateFEVER": 30.27, + "DBPedia": 44.76, + "FEVER": 87.94, + "FiQA2018": 55.0, + "GerDaLIRSmall": 32.77, + "HellaSwag": 34.12, + "HotpotQA": 71.58, + "LEMBNarrativeQARetrieval": 44.09, + "LEMBQMSumRetrieval": 32.49, + "LEMBSummScreenFDRetrieval": 84.8, + "LEMBWikimQARetrieval": 54.16, + "LeCaRDv2": 57.2, + "LegalBenchConsumerContractsQA": 79.39, + "LegalBenchCorporateLobbying": 95.09, + "LegalQuAD": 57.47, + "LegalSummarization": 71.55, + "MSMARCO": 40.24, + "NFCorpus": 42.07, + "NQ": 61.27, + "PIQA": 41.96, + "Quail": 10.15, + "QuoraRetrieval": 89.05, + "RARbCode": 89.64, + "RARbMath": 90.08, + "SCIDOCS": 23.11, + "SIQA": 3.44, + "SciFact": 77.77, + "SpartQA": 7.51, + "TRECCOVID": 79.56, + "TempReasonL1": 2.13, + "TempReasonL2Fact": 28.65, + "TempReasonL2Pure": 10.34, + "TempReasonL3Fact": 25.52, + "TempReasonL3Pure": 15.28, + "Touche2020": 23.35, + "WinoGrande": 29.11 } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "all-MiniLM-L12-v2", - "AILACasedocs": 16.8, - "AILAStatutes": 20.71, - "ARCChallenge": 10.23, - "AlloprofRetrieval": 33.2, - "AlloprofRetrieval (fra-Latn)": 33.2, - "AlphaNLI": 25.35, - "AppsRetrieval (eng-Latn_python-Code)": 5.97, - "ArguAna": 47.13, - "ArguAna-PL (pol-Latn)": 13.4, - "BSARDRetrieval (fra-Latn)": 6.24, - "CQADupstackRetrieval": 42.53, - "ClimateFEVER": 21.57, - "CmedqaRetrieval (cmn-Hans)": 2.58, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 27.07, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 61.01, - "CodeSearchNetCCRetrieval (python-Code)": 71.1, - "CodeSearchNetCCRetrieval (javascript-Code)": 70.13, - "CodeSearchNetCCRetrieval (go-Code)": 63.52, - "CodeSearchNetCCRetrieval (ruby-Code)": 72.28, - "CodeSearchNetCCRetrieval (java-Code)": 67.51, - "CodeSearchNetCCRetrieval (php-Code)": 60.81, - "CodeSearchNetRetrieval (python-Code)": 82.09, - "CodeSearchNetRetrieval (javascript-Code)": 67.52, - "CodeSearchNetRetrieval (go-Code)": 89.06, - "CodeSearchNetRetrieval (ruby-Code)": 74.77, - "CodeSearchNetRetrieval (java-Code)": 53.2, - "CodeSearchNetRetrieval (php-Code)": 77.36, - "CodeTransOceanContest (python-Code_c++-Code)": 59.86, - "CodeTransOceanDL": 19.94, - "CosQA (eng-Latn_python-Code)": 32.09, - "CovidRetrieval (cmn-Hans)": 10.79, - "DBPedia": 33.35, - "DuRetrieval (cmn-Hans)": 6.62, - "EcomRetrieval (cmn-Hans)": 4.01, - "FEVER": 55.9, - "FiQA-PL (pol-Latn)": 5.82, - "FiQA2018": 37.27, - "GerDaLIRSmall (deu-Latn)": 1.35, - "HellaSwag": 24.08, - "HotpotQA": 44.59, - "LEMBNarrativeQARetrieval": 19.64, - "LEMBQMSumRetrieval": 13.08, - "LEMBSummScreenFDRetrieval": 46.98, - "LEMBWikimQARetrieval": 44.88, - "LeCaRDv2 (zho-Hans)": 18.77, - "LegalBenchConsumerContractsQA": 60.21, - "LegalBenchCorporateLobbying": 88.69, - "LegalQuAD (deu-Latn)": 7.44, - "LegalSummarization": 57.43, - "MMarcoRetrieval (cmn-Hans)": 7.46, - "MSMARCO": 39.03, - "MedicalRetrieval (cmn-Hans)": 2.3, - "MintakaRetrieval (ara-Arab)": 2.74, - "MintakaRetrieval (deu-Latn)": 20.04, - "MintakaRetrieval (spa-Latn)": 11.76, - "MintakaRetrieval (fra-Latn)": 16.08, - "MintakaRetrieval (hin-Deva)": 3.03, - "MintakaRetrieval (ita-Latn)": 11.83, - "MintakaRetrieval (jpn-Hira)": 7.3, - "MintakaRetrieval (por-Latn)": 13.66, - "NFCorpus": 32.25, - "NFCorpus-PL (pol-Latn)": 15.43, - "NQ": 46.47, - "PIQA": 26.44, - "Quail": 3.08, - "QuoraRetrieval": 87.75, - "RARbCode": 42.44, - "RARbMath": 66.36, - "RuBQRetrieval (rus-Cyrl)": 8.84, - "SCIDOCS": 21.82, - "SCIDOCS-PL (pol-Latn)": 5.34, - "SIQA": 2.09, - "SciFact": 62.64, - "SciFact-PL (pol-Latn)": 22.48, - "SpartQA": 2.67, - "StackOverflowQA": 80.63, - "SyntecRetrieval (fra-Latn)": 60.8, - "SyntheticText2SQL (eng-Latn_sql-Code)": 43.93, - "T2Retrieval (cmn-Hans)": 4.82, - "TRECCOVID": 50.82, - "TRECCOVID-PL (pol-Latn)": 16.52, - "TempReasonL1": 1.66, - "TempReasonL2Fact": 10.31, - "TempReasonL2Pure": 0.63, - "TempReasonL3Fact": 11.11, - "TempReasonL3Pure": 6.63, - "Touche2020": 17.22, - "VideoRetrieval (cmn-Hans)": 9.36, - "WinoGrande": 27.16, - "XPQARetrieval (fr)": 55.9, - "XPQARetrieval (ara-Arab_ara-Arab)": 7.83, - "XPQARetrieval (eng-Latn_ara-Arab)": 2.55, - "XPQARetrieval (ara-Arab_eng-Latn)": 8.88, - "XPQARetrieval (deu-Latn_deu-Latn)": 56.77, - "XPQARetrieval (eng-Latn_deu-Latn)": 18.2, - "XPQARetrieval (deu-Latn_eng-Latn)": 30.06, - "XPQARetrieval (spa-Latn_spa-Latn)": 42.22, - "XPQARetrieval (eng-Latn_spa-Latn)": 7.53, - "XPQARetrieval (spa-Latn_eng-Latn)": 26.27, - "XPQARetrieval (fra-Latn_fra-Latn)": 55.9, - "XPQARetrieval (eng-Latn_fra-Latn)": 14.89, - "XPQARetrieval (fra-Latn_eng-Latn)": 34.15, - "XPQARetrieval (hin-Deva_hin-Deva)": 33.26, - "XPQARetrieval (eng-Latn_hin-Deva)": 6.44, - "XPQARetrieval (hin-Deva_eng-Latn)": 6.98, - "XPQARetrieval (ita-Latn_ita-Latn)": 58.68, - "XPQARetrieval (eng-Latn_ita-Latn)": 8.56, - "XPQARetrieval (ita-Latn_eng-Latn)": 28.71, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 39.53, - "XPQARetrieval (eng-Latn_jpn-Hira)": 5.7, - "XPQARetrieval (jpn-Hira_eng-Latn)": 13.75, - "XPQARetrieval (kor-Hang_kor-Hang)": 13.44, - "XPQARetrieval (eng-Latn_kor-Hang)": 7.43, - "XPQARetrieval (kor-Hang_eng-Latn)": 7.4, - "XPQARetrieval (pol-Latn_pol-Latn)": 28.07, - "XPQARetrieval (eng-Latn_pol-Latn)": 10.03, - "XPQARetrieval (pol-Latn_eng-Latn)": 16.58, - "XPQARetrieval (por-Latn_por-Latn)": 34.09, - "XPQARetrieval (eng-Latn_por-Latn)": 7.38, - "XPQARetrieval (por-Latn_eng-Latn)": 22.59, - "XPQARetrieval (tam-Taml_tam-Taml)": 9.07, - "XPQARetrieval (eng-Latn_tam-Taml)": 4.15, - "XPQARetrieval (tam-Taml_eng-Latn)": 3.76, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 21.07, - "XPQARetrieval (eng-Latn_cmn-Hans)": 6.58, - "XPQARetrieval (cmn-Hans_eng-Latn)": 9.39 - }, + ], + "recall_at_1": [ { - "Model": "all-MiniLM-L12-v2", - "LEMBNeedleRetrieval": 12.25, - "LEMBPasskeyRetrieval": 14.75 + "Model": "text-embedding-3-large", + "BrightRetrieval (earth_science)": 32.26, + "BrightRetrieval (sustainable_living)": 26.34, + "BrightRetrieval (economics)": 24.76, + "BrightRetrieval (stackoverflow)": 11.54, + "BrightRetrieval (pony)": 0.0, + "BrightRetrieval (biology)": 33.09, + "BrightRetrieval (robotics)": 11.88, + "BrightRetrieval (psychology)": 35.15 } ] }, "STS": { "cosine_spearman": [ { - "Model": "all-MiniLM-L12-v2", - "AFQMC (cmn-Hans)": 7.94, - "ATEC (cmn-Hans)": 12.97, - "BIOSSES": 83.57, - "BQ (cmn-Hans)": 23.31, - "CDSC-R (pol-Latn)": 82.5, - "LCQMC (cmn-Hans)": 21.04, - "PAWSX (cmn-Hans)": 7.31, - "RUParaPhraserSTS (rus-Cyrl)": 45.47, - "RuSTSBenchmarkSTS (rus-Cyrl)": 56.33, - "SICK-R": 79.32, - "SICK-R-PL (pol-Latn)": 54.26, - "SICKFr (fra-Latn)": 63.16, - "STS12": 73.08, - "STS13": 82.13, - "STS14": 76.73, - "STS15": 85.58, - "STS16": 80.23, - "STS17 (ara-Arab)": 58.71, - "STS17 (spa-Latn)": 78.37, - "STS17 (eng-Latn_tur-Latn)": 0.43, - "STS17 (spa-Latn_eng-Latn)": 22.01, - "STS17 (ita-Latn_eng-Latn)": 24.28, - "STS17 (eng-Latn_deu-Latn)": 27.54, - "STS17 (en-en)": 88.63, - "STS17 (kor-Hang)": 43.37, - "STS17 (nld-Latn_eng-Latn)": 24.51, - "STS17 (eng-Latn_ara-Arab)": 0.54, - "STS17 (fra-Latn_eng-Latn)": 30.7, - "STS22 (deu-Latn_fra-Latn)": 43.52, - "STS22 (tur-Latn)": 21.6, - "STS22 (en)": 66.0, - "STS22 (ara-Arab)": 17.54, - "STS22 (pol-Latn_eng-Latn)": 42.67, - "STS22 (spa-Latn_ita-Latn)": 40.71, - "STS22 (pol-Latn)": 19.22, - "STS22 (fra-Latn_pol-Latn)": 16.9, - "STS22 (spa-Latn)": 43.98, - "STS22 (rus-Cyrl)": 11.19, - "STS22 (ita-Latn)": 47.48, - "STS22 (deu-Latn_eng-Latn)": 42.86, - "STS22 (deu-Latn)": 22.53, - "STS22 (cmn-Hans_eng-Latn)": 44.39, - "STS22 (deu-Latn_pol-Latn)": 1.63, - "STS22 (fra-Latn)": 69.51, - "STS22 (cmn-Hans)": 33.15, - "STS22 (spa-Latn_eng-Latn)": 53.99, - "STSB (cmn-Hans)": 36.66, - "STSBenchmark": 83.09, - "STSBenchmarkMultilingualSTS (pol-Latn)": 60.2, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 38.93, - "STSBenchmarkMultilingualSTS (en)": 83.09, - "STSBenchmarkMultilingualSTS (ita-Latn)": 60.71, - "STSBenchmarkMultilingualSTS (fra-Latn)": 66.68, - "STSBenchmarkMultilingualSTS (por-Latn)": 63.85, - "STSBenchmarkMultilingualSTS (nld-Latn)": 60.03, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 56.09, - "STSBenchmarkMultilingualSTS (deu-Latn)": 63.28, - "STSBenchmarkMultilingualSTS (spa-Latn)": 65.33 - }, - { - "Model": "all-MiniLM-L12-v2", - "STS17 (ar-ar)": 58.71, - "STS17 (en-ar)": 0.54, - "STS17 (en-de)": 27.54, - "STS17 (en-en)": 88.63, - "STS17 (en-tr)": 0.43, - "STS17 (es-en)": 22.01, - "STS17 (es-es)": 78.37, - "STS17 (fr-en)": 30.7, - "STS17 (it-en)": 24.28, - "STS17 (ko-ko)": 43.37, - "STS17 (nl-en)": 24.51, - "STS22 (ar)": 17.54, - "STS22 (de)": 22.53, - "STS22 (de-en)": 42.86, - "STS22 (de-fr)": 43.52, - "STS22 (de-pl)": 1.63, - "STS22 (en)": 65.67, - "STS22 (es)": 43.98, - "STS22 (es-en)": 53.99, - "STS22 (es-it)": 40.71, - "STS22 (fr)": 69.51, - "STS22 (fr-pl)": 16.9, - "STS22 (it)": 47.48, - "STS22 (pl)": 19.22, - "STS22 (pl-en)": 42.67, - "STS22 (ru)": 11.19, - "STS22 (tr)": 21.6, - "STS22 (zh)": 33.15, - "STS22 (zh-en)": 44.39 + "Model": "text-embedding-3-large", + "BIOSSES": 84.68, + "SICK-R": 79.0, + "STS12": 72.84, + "STS13": 86.1, + "STS14": 81.15, + "STS15": 88.49, + "STS16": 85.08, + "STS17": 90.22, + "STS22": 66.14, + "STSBenchmark": 83.56 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "all-MiniLM-L12-v2", - "SummEval": 27.9, - "SummEvalFr (fra-Latn)": 26.63 - }, - { - "Model": "all-MiniLM-L12-v2", - "SummEval": 27.91, - "SummEvalFr (fra-Latn)": 26.63 + "Model": "text-embedding-3-large", + "SummEval": 29.92 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "all-MiniLM-L12-v2", - "CEDRClassification (rus-Cyrl)": 33.86, - "SensitiveTopicsClassification (rus-Cyrl)": 18.05 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "all-MiniLM-L12-v2", - "Core17InstructionRetrieval": 1.69, - "News21InstructionRetrieval": -0.35, - "Robust04InstructionRetrieval": -3.59 + "Model": "text-embedding-3-large", + "Core17InstructionRetrieval": -0.2, + "News21InstructionRetrieval": -2.03, + "Robust04InstructionRetrieval": -5.81 } ] } }, - "bge-large-en-v1.5": { + "text-embedding-3-large-256": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "text-embedding-3-large-256", + "AmazonCounterfactualClassification": 73.96, + "AmazonPolarityClassification": 91.32, + "AmazonReviewsClassification": 46.03, + "Banking77Classification": 83.19, + "EmotionClassification": 45.8, + "ImdbClassification": 85.93, + "MTOPDomainClassification": 92.76, + "MTOPIntentClassification": 70.45, + "MassiveIntentClassification": 71.12, + "MassiveScenarioClassification": 75.56, + "ToxicConversationsClassification": 68.52, + "TweetSentimentExtractionClassification": 58.98 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "text-embedding-3-large-256", + "ArxivClusteringP2P": 47.05, + "ArxivClusteringS2S": 42.59, + "BiorxivClusteringP2P": 35.43, + "BiorxivClusteringS2S": 33.86, + "MedrxivClusteringP2P": 32.1, + "MedrxivClusteringS2S": 31.15, + "RedditClustering": 60.18, + "RedditClusteringP2P": 64.71, + "StackExchangeClustering": 71.23, + "StackExchangeClusteringP2P": 35.95, + "TwentyNewsgroupsClustering": 54.24 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "text-embedding-3-large-256", + "SprintDuplicateQuestions": 89.02, + "TwitterSemEval2015": 76.56, + "TwitterURLCorpus": 87.09 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "text-embedding-3-large-256", + "AskUbuntuDupQuestions": 64.61, + "MindSmallReranking": 29.63, + "SciDocsRR": 84.25, + "StackOverflowDupQuestions": 53.46 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-large-en-v1.5", - "AILACasedocs": 25.15, - "AILAStatutes": 20.74, - "ARCChallenge": 9.99, - "AlphaNLI": 13.13, - "BrightRetrieval (stackoverflow)": 9.51, - "BrightRetrieval (earth_science)": 24.15, - "BrightRetrieval (aops)": 6.08, - "BrightRetrieval (sustainable_living)": 13.27, - "BrightRetrieval (psychology)": 17.44, - "BrightRetrieval (robotics)": 12.21, - "BrightRetrieval (theoremqa_theorems)": 6.72, - "BrightRetrieval (pony)": 5.64, - "BrightRetrieval (biology)": 11.96, - "BrightRetrieval (theoremqa_questions)": 12.56, - "BrightRetrieval (leetcode)": 26.68, - "BrightRetrieval (economics)": 16.59, - "GerDaLIRSmall": 3.96, - "HellaSwag": 28.5, - "LeCaRDv2": 22.68, - "LegalBenchConsumerContractsQA": 73.52, - "LegalBenchCorporateLobbying": 91.51, - "LegalQuAD": 16.22, - "LegalSummarization": 59.99, - "PIQA": 27.99, - "Quail": 1.83, - "RARbCode": 48.12, - "RARbMath": 57.36, - "SIQA": 1.04, - "SpartQA": 2.99, - "TempReasonL1": 1.46, - "TempReasonL2Fact": 24.25, - "TempReasonL2Pure": 2.35, - "TempReasonL3Fact": 20.64, - "TempReasonL3Pure": 6.67, - "WinoGrande": 19.18 + "Model": "text-embedding-3-large-256", + "ArguAna": 55.6, + "CQADupstackRetrieval": 42.28, + "ClimateFEVER": 25.8, + "DBPedia": 40.8, + "FEVER": 84.57, + "FiQA2018": 50.33, + "HotpotQA": 62.69, + "MSMARCO": 37.93, + "NFCorpus": 37.94, + "NQ": 56.64, + "QuoraRetrieval": 88.22, + "SCIDOCS": 20.44, + "SciFact": 73.1, + "TRECCOVID": 76.24, + "Touche2020": 22.31 } - ], - "recall_at_1": [ + ] + }, + "STS": { + "cosine_spearman": [ { - "Model": "bge-large-en-v1.5", - "BrightRetrieval (pony)": 0.36, - "BrightRetrieval (psychology)": 11.58, - "BrightRetrieval (stackoverflow)": 13.25, - "BrightRetrieval (robotics)": 10.89, - "BrightRetrieval (earth_science)": 27.73, - "BrightRetrieval (biology)": 16.42, - "BrightRetrieval (economics)": 20.87, - "BrightRetrieval (sustainable_living)": 16.9 + "Model": "text-embedding-3-large-256", + "BIOSSES": 84.87, + "SICK-R": 79.18, + "STS12": 71.98, + "STS13": 85.52, + "STS14": 80.5, + "STS15": 87.51, + "STS16": 84.48, + "STS17": 88.11, + "STS22": 65.92, + "STSBenchmark": 82.34 } ] }, - "STS": { - "cosine_spearman": [] - }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "text-embedding-3-large-256", + "SummEval": 29.92 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -20536,7 +19604,7 @@ "p-MRR": [] } }, - "instructor-large": { + "text-embedding-3-large-instruct": { "BitextMining": { "f1": [] }, @@ -20555,32 +19623,22 @@ "Retrieval": { "ndcg_at_10": [ { - "Model": "instructor-large", - "BrightRetrieval (pony)": 1.32, - "BrightRetrieval (sustainable_living)": 13.16, - "BrightRetrieval (aops)": 7.94, - "BrightRetrieval (biology)": 15.61, - "BrightRetrieval (stackoverflow)": 11.21, - "BrightRetrieval (theoremqa_theorems)": 8.27, - "BrightRetrieval (psychology)": 21.94, - "BrightRetrieval (economics)": 15.99, - "BrightRetrieval (robotics)": 11.45, - "BrightRetrieval (leetcode)": 20.0, - "BrightRetrieval (earth_science)": 21.52, - "BrightRetrieval (theoremqa_questions)": 20.07 - } - ], - "recall_at_1": [ - { - "Model": "instructor-large", - "BrightRetrieval (stackoverflow)": 14.53, - "BrightRetrieval (pony)": 3.94, - "BrightRetrieval (economics)": 14.08, - "BrightRetrieval (earth_science)": 29.45, - "BrightRetrieval (sustainable_living)": 25.42, - "BrightRetrieval (psychology)": 21.29, - "BrightRetrieval (robotics)": 12.87, - "BrightRetrieval (biology)": 24.11 + "Model": "text-embedding-3-large-instruct", + "ARCChallenge": 21.22, + "AlphaNLI": 34.23, + "HellaSwag": 31.4, + "PIQA": 37.52, + "Quail": 13.6, + "RARbCode": 89.41, + "RARbMath": 87.73, + "SIQA": 4.99, + "SpartQA": 7.45, + "TempReasonL1": 2.07, + "TempReasonL2Fact": 39.77, + "TempReasonL2Pure": 11.04, + "TempReasonL3Fact": 37.04, + "TempReasonL3Pure": 15.51, + "WinoGrande": 33.92 } ] }, @@ -20597,87 +19655,132 @@ "p-MRR": [] } }, - "udever-bloom-1b1": { + "text-embedding-3-small": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "udever-bloom-1b1", - "AmazonReviewsClassification (fr)": 35.12, - "MTOPDomainClassification (fr)": 69.24, - "MTOPIntentClassification (fr)": 51.25, - "MasakhaNEWSClassification (fra)": 80.83, - "MassiveIntentClassification (fr)": 43.21, - "MassiveScenarioClassification (fr)": 49.78 + "Model": "text-embedding-3-small", + "AmazonCounterfactualClassification": 76.42, + "AmazonPolarityClassification": 90.84, + "AmazonReviewsClassification": 45.73, + "Banking77Classification": 83.01, + "EmotionClassification": 50.63, + "ImdbClassification": 83.66, + "MTOPDomainClassification": 93.91, + "MTOPIntentClassification": 70.98, + "MassiveIntentClassification": 72.86, + "MassiveScenarioClassification": 76.84, + "ToxicConversationsClassification": 71.91, + "TweetSentimentExtractionClassification": 61.72 } ] }, "Clustering": { "v_measure": [ { - "Model": "udever-bloom-1b1", - "AlloProfClusteringP2P": 62.22, - "AlloProfClusteringS2S": 27.06, - "HALClusteringS2S": 13.86, - "MLSUMClusteringP2P": 44.11, - "MLSUMClusteringS2S": 30.47, - "MasakhaNEWSClusteringP2P (fra)": 40.2, - "MasakhaNEWSClusteringS2S (fra)": 27.35 + "Model": "text-embedding-3-small", + "ArxivClusteringP2P": 46.57, + "ArxivClusteringS2S": 39.35, + "BiorxivClusteringP2P": 37.77, + "BiorxivClusteringS2S": 34.68, + "MedrxivClusteringP2P": 32.77, + "MedrxivClusteringS2S": 31.85, + "RedditClustering": 64.09, + "RedditClusteringP2P": 65.12, + "StackExchangeClustering": 72.05, + "StackExchangeClusteringP2P": 34.04, + "TwentyNewsgroupsClustering": 54.81 } ] }, "PairClassification": { "max_ap": [ { - "Model": "udever-bloom-1b1", - "OpusparcusPC (fr)": 85.54, - "PawsXPairClassification (fr)": 61.99 + "Model": "text-embedding-3-small", + "OpusparcusPC": 94.45, + "SprintDuplicateQuestions": 94.58, + "TwitterSemEval2015": 73.33, + "TwitterURLCorpus": 87.21 }, { - "Model": "udever-bloom-1b1", - "OpusparcusPC (fr)": 90.15, - "PawsXPairClassification (fr)": 63.95 + "Model": "text-embedding-3-small", + "OpusparcusPC": 94.45 } ] }, "Reranking": { "map": [ { - "Model": "udever-bloom-1b1", - "AlloprofReranking": 39.13, - "SyntecReranking": 62.58 + "Model": "text-embedding-3-small", + "AskUbuntuDupQuestions": 62.18, + "MindSmallReranking": 29.93, + "SciDocsRR": 83.25, + "StackOverflowDupQuestions": 51.53 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "udever-bloom-1b1", - "AlloprofRetrieval": 12.37, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 2.78, - "SyntecRetrieval": 40.57, - "XPQARetrieval (fr)": 33.82 + "Model": "text-embedding-3-small", + "ARCChallenge": 14.63, + "AlphaNLI": 30.61, + "ArguAna": 55.49, + "CQADupstackRetrieval": 42.58, + "ClimateFEVER": 26.86, + "DBPedia": 39.97, + "FEVER": 79.42, + "FiQA2018": 44.91, + "HellaSwag": 30.94, + "HotpotQA": 63.63, + "MSMARCO": 37.02, + "NFCorpus": 38.33, + "NQ": 52.86, + "PIQA": 33.69, + "Quail": 6.11, + "QuoraRetrieval": 88.83, + "RARbCode": 72.03, + "RARbMath": 71.07, + "SCIDOCS": 20.8, + "SIQA": 3.03, + "SciFact": 73.37, + "SpartQA": 6.63, + "TRECCOVID": 77.9, + "TempReasonL1": 2.35, + "TempReasonL2Fact": 25.68, + "TempReasonL2Pure": 2.76, + "TempReasonL3Fact": 22.09, + "TempReasonL3Pure": 9.79, + "Touche2020": 24.28, + "WinoGrande": 31.53 } ] }, "STS": { "cosine_spearman": [ { - "Model": "udever-bloom-1b1", - "SICKFr": 59.94, - "STS22 (fr)": 77.1, - "STSBenchmarkMultilingualSTS (fr)": 49.97 + "Model": "text-embedding-3-small", + "BIOSSES": 88.72, + "SICK-R": 76.73, + "STS12": 73.09, + "STS13": 84.92, + "STS14": 79.81, + "STS15": 88.01, + "STS16": 84.41, + "STS17": 90.94, + "STS22": 64.96, + "STSBenchmark": 84.24 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "udever-bloom-1b1", - "SummEvalFr": 29.48 + "Model": "text-embedding-3-small", + "SummEval": 31.12 } ] }, @@ -20688,96 +19791,238 @@ "p-MRR": [] } }, - "voyage-multilingual-2": { + "text-embedding-3-small-instruct": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-small-instruct", + "ARCChallenge": 13.76, + "AlphaNLI": 21.14, + "HellaSwag": 27.2, + "PIQA": 29.59, + "Quail": 6.64, + "RARbCode": 72.14, + "RARbMath": 64.31, + "SIQA": 2.98, + "SpartQA": 3.58, + "TempReasonL1": 2.29, + "TempReasonL2Fact": 26.34, + "TempReasonL2Pure": 3.17, + "TempReasonL3Fact": 22.72, + "TempReasonL3Pure": 9.98, + "WinoGrande": 25.49 + } + ] + }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "text-embedding-ada-002": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "voyage-multilingual-2", - "AmazonReviewsClassification (fr)": 43.36, - "MTOPDomainClassification (fr)": 90.33, - "MTOPIntentClassification (fr)": 60.52, - "MasakhaNEWSClassification (fra)": 74.81, - "MassiveIntentClassification (fr)": 68.06, - "MassiveScenarioClassification (fr)": 74.29 + "Model": "text-embedding-ada-002", + "AmazonCounterfactualClassification": 75.94, + "AmazonPolarityClassification": 86.72, + "AmazonReviewsClassification": 43.76, + "Banking77Classification": 80.66, + "EmotionClassification": 48.74, + "IFlyTek": 44.62, + "ImdbClassification": 77.98, + "JDReview": 74.6, + "MTOPDomainClassification": 89.38, + "MTOPIntentClassification": 64.45, + "MasakhaNEWSClassification": 81.52, + "MassiveIntentClassification": 65.42, + "MassiveScenarioClassification": 71.11, + "MultilingualSentiment": 67.99, + "OnlineShopping": 88.94, + "TNews": 45.77, + "ToxicConversationsClassification": 72.29, + "TweetSentimentExtractionClassification": 61.81, + "Waimai": 82.37 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-multilingual-2", - "AlloProfClusteringP2P": 65.37, - "AlloProfClusteringS2S": 47.03, - "HALClusteringS2S": 27.67, - "MLSUMClusteringP2P (fr)": 45.99, - "MLSUMClusteringS2S (fr)": 45.57, - "MasakhaNEWSClusteringP2P (fra)": 44.53, - "MasakhaNEWSClusteringS2S (fra)": 49.8 + "Model": "text-embedding-ada-002", + "AlloProfClusteringP2P": 64.83, + "AlloProfClusteringS2S": 53.52, + "ArxivClusteringP2P": 45.01, + "ArxivClusteringS2S": 36.85, + "BiorxivClusteringP2P": 36.66, + "BiorxivClusteringS2S": 34.21, + "CLSClusteringP2P": 38.26, + "CLSClusteringS2S": 35.91, + "HALClusteringS2S": 26.18, + "MLSUMClusteringP2P": 44.59, + "MLSUMClusteringS2S": 41.67, + "MasakhaNEWSClusteringP2P": 68.35, + "MasakhaNEWSClusteringS2S": 48.58, + "MedrxivClusteringP2P": 32.6, + "MedrxivClusteringS2S": 30.8, + "RedditClustering": 61.42, + "RedditClusteringP2P": 64.13, + "StackExchangeClustering": 72.22, + "StackExchangeClusteringP2P": 38.49, + "ThuNewsClusteringP2P": 58.71, + "ThuNewsClusteringS2S": 49.86, + "TwentyNewsgroupsClustering": 52.56 } ] }, "PairClassification": { "max_ap": [ { - "Model": "voyage-multilingual-2", - "OpusparcusPC (fr)": 93.68, - "PawsXPairClassification (fr)": 63.64 + "Model": "text-embedding-ada-002", + "Cmnli": 76.03, + "Ocnli": 63.08, + "OpusparcusPC": 94.12, + "PawsXPairClassification": 60.16, + "SprintDuplicateQuestions": 92.17, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 87.22 }, - { - "Model": "voyage-multilingual-2", - "OpusparcusPC (fr)": 93.68, - "PawsXPairClassification (fr)": 63.71 + { + "Model": "text-embedding-ada-002", + "Cmnli": 76.04, + "Ocnli": 63.08, + "OpusparcusPC": 94.16, + "PawsXPairClassification": 60.19, + "SprintDuplicateQuestions": 92.17, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 87.22 } ] }, "Reranking": { "map": [ { - "Model": "voyage-multilingual-2", - "AlloprofReranking": 74.78, - "SyntecReranking": 90.4 + "Model": "text-embedding-ada-002", + "AskUbuntuDupQuestions": 62.05, + "CMedQAv1": 63.08, + "CMedQAv2": 64.02, + "MMarcoReranking": 23.39, + "MindSmallReranking": 31.45, + "SciDocsRR": 81.22, + "StackOverflowDupQuestions": 50.54, + "SyntecReranking": 89.87, + "T2Reranking": 66.65 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-multilingual-2", - "AlloprofRetrieval": 58.27, - "BSARDRetrieval": 5.14, - "LEMBNarrativeQARetrieval": 64.69, - "LEMBQMSumRetrieval": 51.49, - "LEMBSummScreenFDRetrieval": 99.11, - "LEMBWikimQARetrieval": 87.49, - "MintakaRetrieval (fr)": 49.19, - "SyntecRetrieval": 87.28, - "XPQARetrieval (fr)": 72.92 - }, - { - "Model": "voyage-multilingual-2", - "LEMBNeedleRetrieval": 75.25, - "LEMBPasskeyRetrieval": 97.0 + "Model": "text-embedding-ada-002", + "ARCChallenge": 13.3, + "AlloprofRetrieval": 51.64, + "AlphaNLI": 25.65, + "ArguAna": 57.44, + "BSARDRetrieval": 0.61, + "CQADupstackRetrieval": 41.69, + "ClimateFEVER": 21.64, + "CmedqaRetrieval": 22.36, + "CovidRetrieval": 57.21, + "DBPedia": 39.39, + "DuRetrieval": 71.17, + "EcomRetrieval": 44.49, + "FEVER": 74.99, + "FiQA2018": 44.41, + "HellaSwag": 29.29, + "HotpotQA": 60.9, + "MMarcoRetrieval": 69.86, + "MSMARCO": 40.91, + "MedicalRetrieval": 37.92, + "MintakaRetrieval": 29.94, + "NFCorpus": 36.97, + "NQ": 51.58, + "PIQA": 31.02, + "Quail": 5.83, + "QuoraRetrieval": 87.6, + "RARbCode": 83.39, + "RARbMath": 73.21, + "SCIDOCS": 18.36, + "SIQA": 3.14, + "SciFact": 72.75, + "SpartQA": 4.23, + "SyntecRetrieval": 85.97, + "T2Retrieval": 69.14, + "TRECCOVID": 68.47, + "TempReasonL1": 1.68, + "TempReasonL2Fact": 19.93, + "TempReasonL2Pure": 2.6, + "TempReasonL3Fact": 18.02, + "TempReasonL3Pure": 7.58, + "Touche2020": 21.61, + "VideoRetrieval": 43.85, + "WinoGrande": 19.65, + "XPQARetrieval": 73.0 } ] }, "STS": { "cosine_spearman": [ { - "Model": "voyage-multilingual-2", - "SICKFr": 74.9, - "STS22 (fr)": 82.76, - "STSBenchmarkMultilingualSTS (fr)": 82.72 + "Model": "text-embedding-ada-002", + "AFQMC": 23.88, + "ATEC": 29.25, + "BIOSSES": 86.35, + "BQ": 45.33, + "LCQMC": 68.41, + "PAWSX": 16.55, + "QBQTC": 30.27, + "SICK-R": 80.6, + "SICKFr": 76.28, + "STS12": 69.8, + "STS13": 83.27, + "STS14": 76.09, + "STS15": 86.12, + "STS16": 85.96, + "STS17": 90.25, + "STS22": 81.09, + "STSB": 70.61, + "STSBenchmark": 83.17, + "STSBenchmarkMultilingualSTS": 77.55 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "voyage-multilingual-2", - "SummEvalFr": 29.96 + "Model": "text-embedding-ada-002", + "SummEval": 30.8, + "SummEvalFr": 30.5 } ] }, @@ -20788,147 +20033,74 @@ "p-MRR": [] } }, - "USER-base": { + "text-embedding-ada-002-instruct": { "BitextMining": { - "f1": [ - { - "Model": "USER-base", - "Tatoeba (rus-Cyrl_eng-Latn)": 90.2 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "USER-base", - "GeoreviewClassification (rus-Cyrl)": 47.23, - "HeadlineClassification (rus-Cyrl)": 74.88, - "InappropriatenessClassification (rus-Cyrl)": 61.94, - "KinopoiskClassification (rus-Cyrl)": 55.69, - "MassiveIntentClassification (rus-Cyrl)": 65.57, - "MassiveScenarioClassification (rus-Cyrl)": 68.33, - "RuReviewsClassification (rus-Cyrl)": 66.44, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 55.55, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.28 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "USER-base", - "GeoreviewClusteringP2P (rus-Cyrl)": 64.16, - "MLSUMClusteringP2P (rus-Cyrl)": 48.09, - "MLSUMClusteringS2S (rus-Cyrl)": 45.73, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 51.38, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.73 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "USER-base", - "OpusparcusPC (rus-Cyrl)": 91.65, - "TERRa (rus-Cyrl)": 60.02 - }, - { - "Model": "USER-base", - "OpusparcusPC (rus-Cyrl)": 91.65, - "TERRa (rus-Cyrl)": 60.11 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "USER-base", - "MIRACLReranking (rus-Cyrl)": 46.75 - }, - { - "Model": "USER-base", - "RuBQReranking (rus-Cyrl)": 64.42 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "USER-base", - "MIRACLRetrieval (rus-Cyrl)": 35.22, - "RiaNewsRetrieval (rus-Cyrl)": 77.83, - "RuBQRetrieval (rus-Cyrl)": 56.86 + "Model": "text-embedding-ada-002-instruct", + "ARCChallenge": 11.85, + "AlphaNLI": 10.62, + "HellaSwag": 24.8, + "PIQA": 23.87, + "Quail": 5.79, + "RARbCode": 82.36, + "RARbMath": 67.26, + "SIQA": 2.64, + "SpartQA": 4.75, + "TempReasonL1": 1.44, + "TempReasonL2Fact": 19.38, + "TempReasonL2Pure": 2.43, + "TempReasonL3Fact": 17.58, + "TempReasonL3Pure": 7.31, + "WinoGrande": 11.36 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "USER-base", - "RUParaPhraserSTS (rus-Cyrl)": 73.56, - "RuSTSBenchmarkSTS (rus-Cyrl)": 82.26, - "STS22 (rus-Cyrl)": 63.39, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81 - }, - { - "Model": "USER-base", - "RUParaPhraserSTS (rus-Cyrl)": 73.56, - "RuSTSBenchmarkSTS (rus-Cyrl)": 82.26, - "STS22 (rus-Cyrl)": 63.39, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81 - } - ] + "cosine_spearman": [] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "USER-base", - "CEDRClassification (rus-Cyrl)": 46.47, - "SensitiveTopicsClassification (rus-Cyrl)": 27.5 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "electra-small-swedish-cased-discriminator": { + "text-search-ada-001": { "BitextMining": { - "f1": [ - { - "Model": "electra-small-swedish-cased-discriminator", - "BornholmBitextMining": 0.85 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ + "accuracy": [] + }, + "Clustering": { + "v_measure": [ { - "Model": "electra-small-swedish-cased-discriminator", - "AngryTweetsClassification": 40.52, - "DKHateClassification": 52.28, - "DanishPoliticalCommentsClassification": 25.17, - "LccSentimentClassification": 36.67, - "MassiveIntentClassification (da)": 6.51, - "MassiveIntentClassification (nb)": 5.66, - "MassiveIntentClassification (sv)": 6.6, - "MassiveScenarioClassification (da)": 11.5, - "MassiveScenarioClassification (nb)": 11.26, - "MassiveScenarioClassification (sv)": 12.16, - "NoRecClassification": 39.72, - "NordicLangClassification": 44.53, - "NorwegianParliament": 52.44, - "ScalaDaClassification": 51.66, - "ScalaNbClassification": 52.41 + "Model": "text-search-ada-001", + "BiorxivClusteringS2S": 26.05, + "MedrxivClusteringS2S": 25.67, + "TwentyNewsgroupsClustering": 44.92 } ] }, - "Clustering": { - "v_measure": [] - }, "PairClassification": { "max_ap": [] }, @@ -20936,7 +20108,25 @@ "map": [] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "text-search-ada-001", + "ArguAna": 46.91, + "ClimateFEVER": 18.5, + "DBPedia": 36.2, + "FEVER": 72.1, + "FiQA2018": 38.41, + "HotpotQA": 59.39, + "MSMARCO": 37.94, + "NFCorpus": 33.17, + "NQ": 42.81, + "QuoraRetrieval": 70.57, + "SCIDOCS": 14.83, + "SciFact": 67.25, + "TRECCOVID": 72.43, + "Touche2020": 28.68 + } + ] }, "STS": { "cosine_spearman": [] @@ -20951,120 +20141,35 @@ "p-MRR": [] } }, - "gtr-t5-base": { + "text-search-ada-doc-001": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "gtr-t5-base", - "AmazonCounterfactualClassification (en)": 69.33, - "AmazonPolarityClassification": 67.82, - "AmazonReviewsClassification (en)": 38.48, - "Banking77Classification": 79.26, - "EmotionClassification": 42.2, - "ImdbClassification": 65.99, - "MTOPDomainClassification (en)": 92.42, - "MTOPIntentClassification (en)": 62.44, - "MassiveIntentClassification (en)": 67.05, - "MassiveScenarioClassification (en)": 75.4, - "ToxicConversationsClassification": 66.6, - "TweetSentimentExtractionClassification": 56.02 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "gtr-t5-base", - "ArxivClusteringP2P": 35.49, - "ArxivClusteringS2S": 27.18, - "BiorxivClusteringP2P": 27.66, - "BiorxivClusteringS2S": 23.25, - "MedrxivClusteringP2P": 27.57, - "MedrxivClusteringS2S": 25.13, - "RedditClustering": 56.13, - "RedditClusteringP2P": 58.53, - "StackExchangeClustering": 64.21, - "StackExchangeClusteringP2P": 33.01, - "TwentyNewsgroupsClustering": 46.72 - } - ] - }, - "PairClassification": { - "max_ap": [ - { - "Model": "gtr-t5-base", - "SprintDuplicateQuestions": 94.55, - "TwitterSemEval2015": 72.23, - "TwitterURLCorpus": 84.77 - }, - { - "Model": "gtr-t5-base", - "SprintDuplicateQuestions": 94.55, - "TwitterSemEval2015": 72.23, - "TwitterURLCorpus": 84.77 + "Model": "text-search-ada-doc-001", + "TwentyNewsgroupsClustering": 32.92 } ] }, + "PairClassification": { + "max_ap": [] + }, "Reranking": { - "map": [ - { - "Model": "gtr-t5-base", - "AskUbuntuDupQuestions": 60.86, - "MindSmallReranking": 31.33, - "SciDocsRR": 73.71, - "StackOverflowDupQuestions": 51.01 - } - ] + "map": [] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "gtr-t5-base", - "ArguAna": 50.83, - "CQADupstackRetrieval": 34.55, - "ClimateFEVER": 24.88, - "DBPedia": 35.24, - "FEVER": 68.93, - "FiQA2018": 35.15, - "HotpotQA": 54.93, - "MSMARCO": 41.16, - "NFCorpus": 30.22, - "NQ": 50.47, - "QuoraRetrieval": 87.98, - "SCIDOCS": 14.0, - "SciFact": 59.74, - "TRECCOVID": 56.05, - "Touche2020": 25.89 - } - ] + "ndcg_at_10": [] }, "STS": { - "cosine_spearman": [ - { - "Model": "gtr-t5-base", - "BIOSSES": 79.0, - "SICK-R": 71.45, - "STS12": 68.59, - "STS13": 79.09, - "STS14": 74.64, - "STS15": 84.85, - "STS16": 81.57, - "STS17 (en-en)": 85.8, - "STS22 (en)": 66.17, - "STSBenchmark": 79.58 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "gtr-t5-base", - "SummEval": 29.67 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -21073,243 +20178,137 @@ "p-MRR": [] } }, - "google-gecko.text-embedding-preview-0409": { + "text-search-babbage-001": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "AmazonCounterfactualClassification (en)": 75.34, - "AmazonPolarityClassification": 97.34, - "AmazonReviewsClassification (en)": 51.17, - "Banking77Classification": 88.62, - "EmotionClassification": 52.51, - "ImdbClassification": 95.65, - "MTOPDomainClassification (en)": 98.35, - "MTOPIntentClassification (en)": 83.43, - "MassiveIntentClassification (en)": 80.22, - "MassiveScenarioClassification (en)": 87.19, - "ToxicConversationsClassification": 89.67, - "TweetSentimentExtractionClassification": 74.52 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "ArxivClusteringP2P": 46.27, - "ArxivClusteringS2S": 38.36, - "BiorxivClusteringP2P": 37.87, - "BiorxivClusteringS2S": 35.67, - "MedrxivClusteringP2P": 33.11, - "MedrxivClusteringS2S": 31.54, - "RedditClustering": 65.81, - "RedditClusteringP2P": 66.62, - "StackExchangeClustering": 74.52, - "StackExchangeClusteringP2P": 37.63, - "TwentyNewsgroupsClustering": 54.87 - } - ] + "v_measure": [] }, "PairClassification": { - "max_ap": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "SprintDuplicateQuestions": 96.26, - "TwitterSemEval2015": 79.04, - "TwitterURLCorpus": 87.53 - }, - { - "Model": "google-gecko.text-embedding-preview-0409", - "SprintDuplicateQuestions": 96.26, - "TwitterSemEval2015": 79.04, - "TwitterURLCorpus": 87.53 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "AskUbuntuDupQuestions": 64.4, - "MindSmallReranking": 33.07, - "SciDocsRR": 83.59, - "StackOverflowDupQuestions": 54.56 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "google-gecko.text-embedding-preview-0409", - "ArguAna": 62.18, - "BrightRetrieval (earth_science)": 34.38, - "BrightRetrieval (leetcode)": 29.64, - "BrightRetrieval (theoremqa_questions)": 21.51, - "BrightRetrieval (aops)": 9.33, - "BrightRetrieval (sustainable_living)": 17.25, - "BrightRetrieval (pony)": 3.59, - "BrightRetrieval (theoremqa_theorems)": 14.31, - "BrightRetrieval (stackoverflow)": 17.93, - "BrightRetrieval (biology)": 22.98, - "BrightRetrieval (robotics)": 15.98, - "BrightRetrieval (economics)": 19.5, - "BrightRetrieval (psychology)": 27.86, - "CQADupstackRetrieval": 48.89, - "ClimateFEVER": 33.21, - "DBPedia": 47.12, - "FEVER": 86.96, - "FiQA2018": 59.24, - "HotpotQA": 71.33, - "MSMARCO": 32.58, - "NFCorpus": 40.33, - "NQ": 61.28, - "QuoraRetrieval": 88.18, - "SCIDOCS": 20.34, - "SciFact": 75.42, - "TRECCOVID": 82.62, - "Touche2020": 25.86 - } - ], - "recall_at_1": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "BrightRetrieval (economics)": 21.84, - "BrightRetrieval (stackoverflow)": 19.23, - "BrightRetrieval (pony)": 0.29, - "BrightRetrieval (earth_science)": 38.0, - "BrightRetrieval (sustainable_living)": 25.65, - "BrightRetrieval (robotics)": 12.87, - "BrightRetrieval (psychology)": 30.69, - "BrightRetrieval (biology)": 30.91 + "Model": "text-search-babbage-001", + "ArguAna": 49.2, + "ClimateFEVER": 19.9, + "FEVER": 77.0, + "FiQA2018": 42.2, + "HotpotQA": 63.1, + "NFCorpus": 36.7, + "QuoraRetrieval": 69.7, + "SciFact": 70.4, + "TRECCOVID": 58.5, + "Touche2020": 29.7 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "BIOSSES": 89.46, - "SICK-R": 81.93, - "STS12": 77.59, - "STS13": 90.36, - "STS14": 85.25, - "STS15": 89.66, - "STS16": 87.34, - "STS17 (en-en)": 92.06, - "STS22 (en)": 68.02, - "STSBenchmark": 88.99 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "SummEval": 32.63 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "Core17InstructionRetrieval": 5.44, - "News21InstructionRetrieval": 3.94, - "Robust04InstructionRetrieval": -2.4 - } - ] + "p-MRR": [] } }, - "bert-base-15lang-cased": { + "text-search-curie-001": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "bert-base-15lang-cased", - "AmazonReviewsClassification (fr)": 29.35, - "MTOPDomainClassification (fr)": 63.7, - "MTOPIntentClassification (fr)": 37.85, - "MasakhaNEWSClassification (fra)": 63.89, - "MassiveIntentClassification (fr)": 37.28, - "MassiveScenarioClassification (fr)": 44.47 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ + "v_measure": [] + }, + "PairClassification": { + "max_ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "bert-base-15lang-cased", - "AlloProfClusteringP2P": 53.16, - "AlloProfClusteringS2S": 43.43, - "HALClusteringS2S": 20.26, - "MLSUMClusteringP2P": 41.22, - "MLSUMClusteringS2S": 31.88, - "MasakhaNEWSClusteringP2P (fra)": 24.23, - "MasakhaNEWSClusteringS2S (fra)": 24.46 + "Model": "text-search-curie-001", + "ArguAna": 46.98, + "ClimateFEVER": 19.4, + "FEVER": 75.6, + "FiQA2018": 45.21, + "HotpotQA": 64.8, + "NFCorpus": 38.01, + "QuoraRetrieval": 67.7, + "SCIDOCS": 17.74, + "SciFact": 74.35, + "TRECCOVID": 56.14, + "Touche2020": 30.9 } ] }, + "STS": { + "cosine_spearman": [] + }, + "Summarization": { + "cosine_spearman": [] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] + } + }, + "text-search-davinci-001": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, "PairClassification": { - "max_ap": [ - { - "Model": "bert-base-15lang-cased", - "OpusparcusPC (fr)": 86.78, - "PawsXPairClassification (fr)": 53.38 - }, - { - "Model": "bert-base-15lang-cased", - "OpusparcusPC (fr)": 87.73, - "PawsXPairClassification (fr)": 53.38 - } - ] + "max_ap": [] }, "Reranking": { - "map": [ - { - "Model": "bert-base-15lang-cased", - "AlloprofReranking": 36.21, - "SyntecReranking": 53.25 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-15lang-cased", - "AlloprofRetrieval": 1.61, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 3.55, - "SyntecRetrieval": 18.95, - "XPQARetrieval (fr)": 18.35 + "Model": "text-search-davinci-001", + "ArguAna": 43.5, + "ClimateFEVER": 22.3, + "FEVER": 77.5, + "FiQA2018": 51.2, + "HotpotQA": 68.8, + "NFCorpus": 40.7, + "QuoraRetrieval": 63.8, + "SciFact": 75.4, + "TRECCOVID": 64.9, + "Touche2020": 29.1 } ] }, "STS": { - "cosine_spearman": [ - { - "Model": "bert-base-15lang-cased", - "SICKFr": 58.77, - "STS22 (fr)": 40.4, - "STSBenchmarkMultilingualSTS (fr)": 52.25 - } - ] + "cosine_spearman": [] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "bert-base-15lang-cased", - "SummEvalFr": 29.13 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -21318,87 +20317,118 @@ "p-MRR": [] } }, - "multi-qa-MiniLM-L6-cos-v1": { + "text-similarity-ada-001": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "AmazonReviewsClassification (fr)": 27.05, - "MTOPDomainClassification (fr)": 72.97, - "MTOPIntentClassification (fr)": 37.18, - "MasakhaNEWSClassification (fra)": 75.62, - "MassiveIntentClassification (fr)": 42.64, - "MassiveScenarioClassification (fr)": 49.92 + "Model": "text-similarity-ada-001", + "AmazonCounterfactualClassification": 76.4, + "AmazonPolarityClassification": 92.83, + "AmazonReviewsClassification": 47.45, + "Banking77Classification": 68.04, + "EmotionClassification": 50.33, + "ImdbClassification": 89.38, + "MTOPDomainClassification": 89.89, + "MTOPIntentClassification": 64.8, + "MassiveIntentClassification": 65.17, + "MassiveScenarioClassification": 67.67, + "ToxicConversationsClassification": 70.0, + "TweetSentimentExtractionClassification": 63.35 } ] }, "Clustering": { "v_measure": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "AlloProfClusteringP2P": 49.13, - "AlloProfClusteringS2S": 26.16, - "HALClusteringS2S": 12.49, - "MLSUMClusteringP2P": 35.15, - "MLSUMClusteringS2S": 25.95, - "MasakhaNEWSClusteringP2P (fra)": 53.73, - "MasakhaNEWSClusteringS2S (fra)": 27.27 + "Model": "text-similarity-ada-001", + "ArxivClusteringP2P": 41.49, + "ArxivClusteringS2S": 28.47, + "BiorxivClusteringP2P": 36.86, + "BiorxivClusteringS2S": 27.55, + "MedrxivClusteringP2P": 31.09, + "MedrxivClusteringS2S": 26.5, + "RedditClustering": 42.47, + "RedditClusteringP2P": 58.1, + "StackExchangeClustering": 53.52, + "StackExchangeClusteringP2P": 30.43, + "TwentyNewsgroupsClustering": 36.26 } ] }, "PairClassification": { "max_ap": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "OpusparcusPC (fr)": 88.07, - "PawsXPairClassification (fr)": 57.36 + "Model": "text-similarity-ada-001", + "SprintDuplicateQuestions": 77.85, + "TwitterSemEval2015": 69.04, + "TwitterURLCorpus": 83.69 }, { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "OpusparcusPC (fr)": 88.07, - "PawsXPairClassification (fr)": 57.48 + "Model": "text-similarity-ada-001", + "SprintDuplicateQuestions": 78.07, + "TwitterSemEval2015": 69.04, + "TwitterURLCorpus": 83.69 } ] }, "Reranking": { "map": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "AlloprofReranking": 40.28, - "SyntecReranking": 65.08 + "Model": "text-similarity-ada-001", + "AskUbuntuDupQuestions": 53.49, + "MindSmallReranking": 30.71, + "SciDocsRR": 71.04, + "StackOverflowDupQuestions": 40.85 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "AlloprofRetrieval": 30.23, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 16.31, - "SyntecRetrieval": 58.07, - "XPQARetrieval (fr)": 48.83 + "Model": "text-similarity-ada-001", + "ArguAna": 39.65, + "CQADupstackRetrieval": 10.17, + "ClimateFEVER": 2.83, + "DBPedia": 3.48, + "FEVER": 4.45, + "FiQA2018": 7.54, + "HotpotQA": 12.6, + "MSMARCO": 10.53, + "NFCorpus": 20.59, + "NQ": 2.02, + "QuoraRetrieval": 82.18, + "SCIDOCS": 6.28, + "SciFact": 45.46, + "TRECCOVID": 24.56, + "Touche2020": 3.1 } ] }, "STS": { "cosine_spearman": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "SICKFr": 62.11, - "STS22 (fr)": 74.62, - "STSBenchmarkMultilingualSTS (fr)": 63.85 + "Model": "text-similarity-ada-001", + "BIOSSES": 78.04, + "SICK-R": 77.48, + "STS12": 72.3, + "STS13": 81.49, + "STS14": 74.74, + "STS15": 84.28, + "STS16": 82.06, + "STS17": 87.08, + "STS22": 64.71, + "STSBenchmark": 83.78 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "multi-qa-MiniLM-L6-cos-v1", - "SummEvalFr": 27.59 + "Model": "text-similarity-ada-001", + "SummEval": 26.94 } ] }, @@ -21409,85 +20439,53 @@ "p-MRR": [] } }, - "USER-bge-m3": { + "text-similarity-babbage-001": { "BitextMining": { - "f1": [ - { - "Model": "USER-bge-m3", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.52 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "USER-bge-m3", - "GeoreviewClassification (rus-Cyrl)": 50.98, - "HeadlineClassification (rus-Cyrl)": 70.09, - "InappropriatenessClassification (rus-Cyrl)": 60.76, - "KinopoiskClassification (rus-Cyrl)": 63.33, - "MassiveIntentClassification (rus-Cyrl)": 68.85, - "MassiveScenarioClassification (rus-Cyrl)": 72.9, - "RuReviewsClassification (rus-Cyrl)": 68.52, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 57.67, - "RuSciBenchOECDClassification (rus-Cyrl)": 44.2 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "USER-bge-m3", - "GeoreviewClusteringP2P (rus-Cyrl)": 62.79, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 53.11, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.93 + "Model": "text-similarity-babbage-001", + "RedditClustering": 45.64, + "StackExchangeClustering": 53.01, + "TwentyNewsgroupsClustering": 42.01 } ] }, "PairClassification": { "max_ap": [ { - "Model": "USER-bge-m3", - "OpusparcusPC (rus-Cyrl)": 90.73, - "TERRa (rus-Cyrl)": 64.99 - }, - { - "Model": "USER-bge-m3", - "OpusparcusPC (rus-Cyrl)": 90.73, - "TERRa (rus-Cyrl)": 65.07 + "Model": "text-similarity-babbage-001", + "SprintDuplicateQuestions": 76.46, + "TwitterSemEval2015": 70.85, + "TwitterURLCorpus": 85.08 } ] }, "Reranking": { "map": [ { - "Model": "USER-bge-m3", - "MIRACLReranking (rus-Cyrl)": 64.35 - }, - { - "Model": "USER-bge-m3", - "RuBQReranking (rus-Cyrl)": 73.08 + "Model": "text-similarity-babbage-001", + "AskUbuntuDupQuestions": 54.68, + "SciDocsRR": 72.78, + "StackOverflowDupQuestions": 40.65 } ] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "USER-bge-m3", - "MIRACLRetrieval (rus-Cyrl)": 67.33, - "RiaNewsRetrieval (rus-Cyrl)": 83.53, - "RuBQRetrieval (rus-Cyrl)": 70.03 - } - ] + "ndcg_at_10": [] }, "STS": { "cosine_spearman": [ { - "Model": "USER-bge-m3", - "RUParaPhraserSTS (rus-Cyrl)": 76.36, - "RuSTSBenchmarkSTS (rus-Cyrl)": 83.35, - "STS22 (rus-Cyrl)": 66.42, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 82.96 + "Model": "text-similarity-babbage-001", + "BIOSSES": 78.12, + "SICK-R": 77.02, + "STSBenchmark": 84.32 } ] }, @@ -21495,261 +20493,133 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "USER-bge-m3", - "CEDRClassification (rus-Cyrl)": 45.48, - "SensitiveTopicsClassification (rus-Cyrl)": 26.29 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "contriever-base-msmarco": { + "text-similarity-curie-001": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "contriever-base-msmarco", - "AmazonCounterfactualClassification (en)": 72.19, - "AmazonPolarityClassification": 68.63, - "AmazonReviewsClassification (en)": 37.42, - "Banking77Classification": 80.02, - "EmotionClassification": 44.77, - "ImdbClassification": 67.04, - "MTOPDomainClassification (en)": 93.18, - "MTOPIntentClassification (en)": 69.31, - "MassiveIntentClassification (en)": 67.78, - "MassiveScenarioClassification (en)": 76.0, - "ToxicConversationsClassification": 67.77, - "TweetSentimentExtractionClassification": 56.1 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "contriever-base-msmarco", - "ArxivClusteringP2P": 42.61, - "ArxivClusteringS2S": 32.32, - "BiorxivClusteringP2P": 34.97, - "BiorxivClusteringS2S": 29.08, - "MedrxivClusteringP2P": 31.19, - "MedrxivClusteringS2S": 27.27, - "RedditClustering": 54.89, - "RedditClusteringP2P": 57.58, - "StackExchangeClustering": 63.15, - "StackExchangeClusteringP2P": 32.25, - "TwentyNewsgroupsClustering": 46.82 + "Model": "text-similarity-curie-001", + "RedditClustering": 40.79, + "StackExchangeClustering": 55.14, + "TwentyNewsgroupsClustering": 37.64 } ] }, "PairClassification": { "max_ap": [ { - "Model": "contriever-base-msmarco", - "SprintDuplicateQuestions": 95.55, - "TwitterSemEval2015": 66.85, - "TwitterURLCorpus": 85.21 - }, - { - "Model": "contriever-base-msmarco", - "SprintDuplicateQuestions": 95.55, - "TwitterSemEval2015": 66.85, - "TwitterURLCorpus": 85.21 + "Model": "text-similarity-curie-001", + "SprintDuplicateQuestions": 79.85, + "TwitterSemEval2015": 69.45, + "TwitterURLCorpus": 84.06 } ] }, "Reranking": { "map": [ { - "Model": "contriever-base-msmarco", - "AskUbuntuDupQuestions": 56.69, - "MindSmallReranking": 31.58, - "SciDocsRR": 76.51, - "StackOverflowDupQuestions": 47.78 + "Model": "text-similarity-curie-001", + "AskUbuntuDupQuestions": 55.09, + "SciDocsRR": 70.93, + "StackOverflowDupQuestions": 42.42 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "contriever-base-msmarco", - "ArguAna": 48.32, - "CQADupstackRetrieval": 33.67, - "ClimateFEVER": 24.79, - "DBPedia": 38.1, - "FEVER": 59.29, - "FiQA2018": 27.42, - "HotpotQA": 56.81, - "MSMARCO": 36.77, - "NFCorpus": 31.32, - "NQ": 41.83, - "QuoraRetrieval": 86.72, - "SCIDOCS": 17.12, - "SciFact": 65.51, - "TRECCOVID": 44.77, - "Touche2020": 15.79 + "Model": "text-similarity-curie-001", + "FiQA2018": 5.14, + "NFCorpus": 19.96, + "QuoraRetrieval": 83.11, + "SciFact": 46.68, + "TRECCOVID": 7.61 } ] }, "STS": { "cosine_spearman": [ { - "Model": "contriever-base-msmarco", - "BIOSSES": 83.32, - "SICK-R": 70.2, - "STS12": 64.34, - "STS13": 80.03, - "STS14": 74.51, - "STS15": 83.3, - "STS16": 79.67, - "STS17 (en-en)": 86.32, - "STS22 (en)": 64.64, - "STSBenchmark": 78.81 + "Model": "text-similarity-curie-001", + "BIOSSES": 77.46, + "SICK-R": 77.26, + "STSBenchmark": 83.02 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "contriever-base-msmarco", - "SummEval": 30.36 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "contriever-base-msmarco", - "Core17InstructionRetrieval": -2.48, - "News21InstructionRetrieval": -2.83, - "Robust04InstructionRetrieval": -6.12 - } - ] + "p-MRR": [] } }, - "LLM2Vec-Meta-Llama-3-unsupervised": { + "text-similarity-davinci-001": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "AmazonCounterfactualClassification (en)": 75.7, - "AmazonPolarityClassification": 80.68, - "AmazonReviewsClassification (en)": 40.0, - "Banking77Classification": 84.77, - "EmotionClassification": 47.08, - "ImdbClassification": 75.19, - "MTOPDomainClassification (en)": 94.47, - "MTOPIntentClassification (en)": 81.09, - "MassiveIntentClassification (en)": 75.01, - "MassiveScenarioClassification (en)": 79.16, - "ToxicConversationsClassification": 71.85, - "TweetSentimentExtractionClassification": 57.61 - } - ] + "accuracy": [] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "ArxivClusteringP2P": 49.22, - "ArxivClusteringS2S": 41.71, - "BiorxivClusteringP2P": 38.39, - "BiorxivClusteringS2S": 31.31, - "MedrxivClusteringP2P": 31.47, - "MedrxivClusteringS2S": 27.87, - "RedditClustering": 43.67, - "RedditClusteringP2P": 61.67, - "StackExchangeClustering": 68.2, - "StackExchangeClusteringP2P": 36.36, - "TwentyNewsgroupsClustering": 32.01 + "Model": "text-similarity-davinci-001", + "RedditClustering": 31.78, + "StackExchangeClustering": 36.86, + "TwentyNewsgroupsClustering": 29.33 } ] }, "PairClassification": { "max_ap": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "SprintDuplicateQuestions": 88.14, - "TwitterSemEval2015": 66.6, - "TwitterURLCorpus": 79.3 - }, - { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "SprintDuplicateQuestions": 88.14, - "TwitterSemEval2015": 66.6, - "TwitterURLCorpus": 79.3 + "Model": "text-similarity-davinci-001", + "SprintDuplicateQuestions": 69.52, + "TwitterSemEval2015": 74.42, + "TwitterURLCorpus": 83.75 } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "AskUbuntuDupQuestions": 57.16, - "MindSmallReranking": 30.1, - "SciDocsRR": 76.28, - "StackOverflowDupQuestions": 48.82 + "Model": "text-similarity-davinci-001", + "AskUbuntuDupQuestions": 53.56, + "SciDocsRR": 68.7, + "StackOverflowDupQuestions": 39.41 } ] }, "Retrieval": { - "ndcg_at_10": [ - { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "ArguAna": 51.73, - "CQADupstackRetrieval": 32.4, - "ClimateFEVER": 23.58, - "DBPedia": 26.78, - "FEVER": 53.42, - "FiQA2018": 28.56, - "HotpotQA": 52.37, - "MSMARCO": 17.47, - "NFCorpus": 26.28, - "NQ": 37.65, - "QuoraRetrieval": 84.64, - "SCIDOCS": 10.39, - "SciFact": 66.36, - "TRECCOVID": 63.34, - "Touche2020": 12.82 - } - ] + "ndcg_at_10": [] }, "STS": { "cosine_spearman": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "BIOSSES": 84.67, - "SICK-R": 72.16, - "STS12": 61.6, - "STS13": 79.71, - "STS14": 72.11, - "STS15": 82.18, - "STS16": 79.41, - "STS17 (en-en)": 85.44, - "STS22 (en)": 63.9, - "STSBenchmark": 77.44 + "Model": "text-similarity-davinci-001", + "BIOSSES": 68.95, + "SICK-R": 78.72, + "STSBenchmark": 84.08 } ] }, "Summarization": { - "cosine_spearman": [ - { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "SummEval": 31.45 - } - ] + "cosine_spearman": [] }, "MultilabelClassification": { "accuracy": [] @@ -21758,89 +20628,89 @@ "p-MRR": [] } }, - "bge-base-zh-v1.5": { + "text2vec-base-chinese": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bge-base-zh-v1.5", - "AmazonReviewsClassification (zh)": 40.15, - "IFlyTek": 48.62, - "JDReview": 83.62, - "MassiveIntentClassification (zh-CN)": 67.93, - "MassiveScenarioClassification (zh-CN)": 73.98, - "MultilingualSentiment": 70.67, - "OnlineShopping": 91.26, - "TNews": 51.08, - "Waimai": 85.36 + "Model": "text2vec-base-chinese", + "AmazonReviewsClassification": 34.12, + "IFlyTek": 42.05, + "JDReview": 82.14, + "MassiveIntentClassification": 63.98, + "MassiveScenarioClassification": 70.52, + "MultilingualSentiment": 60.98, + "OnlineShopping": 85.69, + "TNews": 43.01, + "Waimai": 77.22 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-base-zh-v1.5", - "CLSClusteringP2P": 39.91, - "CLSClusteringS2S": 37.63, - "ThuNewsClusteringP2P": 58.45, - "ThuNewsClusteringS2S": 54.12 + "Model": "text2vec-base-chinese", + "CLSClusteringP2P": 35.27, + "CLSClusteringS2S": 32.42, + "ThuNewsClusteringP2P": 42.92, + "ThuNewsClusteringS2S": 40.01 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bge-base-zh-v1.5", - "Cmnli": 84.1, - "Ocnli": 75.41 + "Model": "text2vec-base-chinese", + "Cmnli": 73.87, + "Ocnli": 60.95 }, { - "Model": "bge-base-zh-v1.5", - "Cmnli": 84.11, - "Ocnli": 75.43 + "Model": "text2vec-base-chinese", + "Cmnli": 73.9, + "Ocnli": 61.26 } ] }, "Reranking": { "map": [ { - "Model": "bge-base-zh-v1.5", - "CMedQAv1": 80.47, - "CMedQAv2": 84.88, - "MMarcoReranking": 29.74, - "T2Reranking": 66.49 + "Model": "text2vec-base-chinese", + "CMedQAv1": 59.26, + "CMedQAv2": 59.82, + "MMarcoReranking": 12.76, + "T2Reranking": 65.95 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-base-zh-v1.5", - "CmedqaRetrieval": 41.61, - "CovidRetrieval": 74.7, - "DuRetrieval": 85.07, - "EcomRetrieval": 64.25, - "MMarcoRetrieval": 77.69, - "MedicalRetrieval": 56.51, - "T2Retrieval": 83.71, - "VideoRetrieval": 72.35 + "Model": "text2vec-base-chinese", + "CmedqaRetrieval": 15.91, + "CovidRetrieval": 44.81, + "DuRetrieval": 52.23, + "EcomRetrieval": 34.6, + "MMarcoRetrieval": 44.06, + "MedicalRetrieval": 27.56, + "T2Retrieval": 51.67, + "VideoRetrieval": 39.52 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bge-base-zh-v1.5", - "AFQMC": 42.4, - "ATEC": 48.17, - "BQ": 61.78, - "LCQMC": 74.45, - "PAWSX": 20.4, - "QBQTC": 36.22, - "STS22 (zh)": 68.01, - "STSB": 78.31 + "Model": "text2vec-base-chinese", + "AFQMC": 26.06, + "ATEC": 31.93, + "BQ": 42.67, + "LCQMC": 70.16, + "PAWSX": 17.21, + "QBQTC": 24.62, + "STS22": 55.35, + "STSB": 79.3 } ] }, @@ -21854,87 +20724,87 @@ "p-MRR": [] } }, - "bert-base-10lang-cased": { + "text2vec-base-multilingual": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bert-base-10lang-cased", - "AmazonReviewsClassification (fr)": 29.38, - "MTOPDomainClassification (fr)": 63.65, - "MTOPIntentClassification (fr)": 37.87, - "MasakhaNEWSClassification (fra)": 63.93, - "MassiveIntentClassification (fr)": 37.28, - "MassiveScenarioClassification (fr)": 44.5 + "Model": "text2vec-base-multilingual", + "AmazonReviewsClassification": 34.25, + "MTOPDomainClassification": 71.83, + "MTOPIntentClassification": 44.53, + "MasakhaNEWSClassification": 73.84, + "MassiveIntentClassification": 51.93, + "MassiveScenarioClassification": 58.31 } ] }, "Clustering": { "v_measure": [ { - "Model": "bert-base-10lang-cased", - "AlloProfClusteringP2P": 53.22, - "AlloProfClusteringS2S": 42.92, - "HALClusteringS2S": 19.94, - "MLSUMClusteringP2P": 40.96, - "MLSUMClusteringS2S": 31.87, - "MasakhaNEWSClusteringP2P (fra)": 24.23, - "MasakhaNEWSClusteringS2S (fra)": 24.46 + "Model": "text2vec-base-multilingual", + "AlloProfClusteringP2P": 49.11, + "AlloProfClusteringS2S": 32.72, + "HALClusteringS2S": 16.19, + "MLSUMClusteringP2P": 36.19, + "MLSUMClusteringS2S": 30.39, + "MasakhaNEWSClusteringP2P": 38.51, + "MasakhaNEWSClusteringS2S": 32.51 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bert-base-10lang-cased", - "OpusparcusPC (fr)": 86.79, - "PawsXPairClassification (fr)": 53.4 + "Model": "text2vec-base-multilingual", + "OpusparcusPC": 92.04, + "PawsXPairClassification": 65.57 }, { - "Model": "bert-base-10lang-cased", - "OpusparcusPC (fr)": 87.78, - "PawsXPairClassification (fr)": 53.4 + "Model": "text2vec-base-multilingual", + "OpusparcusPC": 92.04, + "PawsXPairClassification": 65.6 } ] }, "Reranking": { "map": [ { - "Model": "bert-base-10lang-cased", - "AlloprofReranking": 36.21, - "SyntecReranking": 53.25 + "Model": "text2vec-base-multilingual", + "AlloprofReranking": 51.48, + "SyntecReranking": 70.28 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-10lang-cased", - "AlloprofRetrieval": 1.6, + "Model": "text2vec-base-multilingual", + "AlloprofRetrieval": 18.9, "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 3.55, - "SyntecRetrieval": 18.95, - "XPQARetrieval (fr)": 18.39 + "MintakaRetrieval": 14.81, + "SyntecRetrieval": 49.69, + "XPQARetrieval": 40.4 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bert-base-10lang-cased", - "SICKFr": 58.76, - "STS22 (fr)": 40.31, - "STSBenchmarkMultilingualSTS (fr)": 52.25 + "Model": "text2vec-base-multilingual", + "SICKFr": 77.25, + "STS22": 74.1, + "STSBenchmarkMultilingualSTS": 83.48 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "bert-base-10lang-cased", - "SummEvalFr": 29.06 + "Model": "text2vec-base-multilingual", + "SummEvalFr": 29.33 } ] }, @@ -21945,87 +20815,89 @@ "p-MRR": [] } }, - "rubert-base-cased-sentence": { + "text2vec-large-chinese": { "BitextMining": { - "f1": [ - { - "Model": "rubert-base-cased-sentence", - "Tatoeba (rus-Cyrl_eng-Latn)": 20.26 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "rubert-base-cased-sentence", - "GeoreviewClassification (rus-Cyrl)": 38.05, - "HeadlineClassification (rus-Cyrl)": 67.64, - "InappropriatenessClassification (rus-Cyrl)": 58.27, - "KinopoiskClassification (rus-Cyrl)": 45.86, - "MassiveIntentClassification (rus-Cyrl)": 49.1, - "MassiveScenarioClassification (rus-Cyrl)": 51.91, - "RuReviewsClassification (rus-Cyrl)": 58.34, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.18, - "RuSciBenchOECDClassification (rus-Cyrl)": 40.11 + "Model": "text2vec-large-chinese", + "AmazonReviewsClassification": 33.77, + "IFlyTek": 41.54, + "JDReview": 81.56, + "MassiveIntentClassification": 63.23, + "MassiveScenarioClassification": 68.45, + "MultilingualSentiment": 58.97, + "OnlineShopping": 83.51, + "TNews": 38.92, + "Waimai": 76.01 } ] }, "Clustering": { "v_measure": [ { - "Model": "rubert-base-cased-sentence", - "GeoreviewClusteringP2P (rus-Cyrl)": 41.82, - "MLSUMClusteringP2P (rus-Cyrl)": 43.71, - "MLSUMClusteringS2S (rus-Cyrl)": 45.94, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 46.29, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.28 + "Model": "text2vec-large-chinese", + "CLSClusteringP2P": 30.13, + "CLSClusteringS2S": 28.77, + "ThuNewsClusteringP2P": 35.05, + "ThuNewsClusteringS2S": 26.14 } ] }, "PairClassification": { "max_ap": [ { - "Model": "rubert-base-cased-sentence", - "OpusparcusPC (rus-Cyrl)": 81.52, - "TERRa (rus-Cyrl)": 59.12 + "Model": "text2vec-large-chinese", + "Cmnli": 77.67, + "Ocnli": 64.04 }, { - "Model": "rubert-base-cased-sentence", - "OpusparcusPC (rus-Cyrl)": 81.76, - "TERRa (rus-Cyrl)": 59.12 + "Model": "text2vec-large-chinese", + "Cmnli": 77.85, + "Ocnli": 64.47 } ] }, "Reranking": { "map": [ { - "Model": "rubert-base-cased-sentence", - "MIRACLReranking (rus-Cyrl)": 13.77 - }, - { - "Model": "rubert-base-cased-sentence", - "RuBQReranking (rus-Cyrl)": 39.89 + "Model": "text2vec-large-chinese", + "CMedQAv1": 58.92, + "CMedQAv2": 60.41, + "MMarcoReranking": 12.48, + "T2Reranking": 64.82 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "rubert-base-cased-sentence", - "MIRACLRetrieval (rus-Cyrl)": 1.92, - "RiaNewsRetrieval (rus-Cyrl)": 6.72, - "RuBQRetrieval (rus-Cyrl)": 12.63 + "Model": "text2vec-large-chinese", + "CmedqaRetrieval": 15.53, + "CovidRetrieval": 60.48, + "DuRetrieval": 51.87, + "EcomRetrieval": 37.58, + "MMarcoRetrieval": 45.96, + "MedicalRetrieval": 30.93, + "T2Retrieval": 50.52, + "VideoRetrieval": 42.65 } ] }, "STS": { "cosine_spearman": [ { - "Model": "rubert-base-cased-sentence", - "RUParaPhraserSTS (rus-Cyrl)": 66.24, - "RuSTSBenchmarkSTS (rus-Cyrl)": 66.03, - "STS22 (rus-Cyrl)": 51.27, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 66.71 + "Model": "text2vec-large-chinese", + "AFQMC": 24.51, + "ATEC": 32.45, + "BQ": 44.22, + "LCQMC": 69.16, + "PAWSX": 14.55, + "QBQTC": 29.51, + "STS22": 65.94, + "STSB": 79.45 } ] }, @@ -22033,24 +20905,24 @@ "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "rubert-base-cased-sentence", - "CEDRClassification (rus-Cyrl)": 35.55, - "SensitiveTopicsClassification (rus-Cyrl)": 20.05 - } - ] + "accuracy": [] }, "InstructionRetrieval": { "p-MRR": [] } }, - "text-embedding-3-small-instruct": { + "titan-embed-text-v1": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "titan-embed-text-v1", + "AmazonCounterfactualClassification": 61.85, + "Banking77Classification": 83.21 + } + ] }, "Clustering": { "v_measure": [] @@ -22059,32 +20931,41 @@ "max_ap": [] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "titan-embed-text-v1", + "SciDocsRR": 88.87 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-small-instruct", - "ARCChallenge": 13.76, - "AlphaNLI": 21.14, - "HellaSwag": 27.2, - "PIQA": 29.59, - "Quail": 6.64, - "RARbCode": 72.14, - "RARbMath": 64.31, - "SIQA": 2.98, - "SpartQA": 3.58, - "TempReasonL1": 2.29, - "TempReasonL2Fact": 26.34, - "TempReasonL2Pure": 3.17, - "TempReasonL3Fact": 22.72, - "TempReasonL3Pure": 9.98, - "WinoGrande": 25.49 + "Model": "titan-embed-text-v1", + "ArguAna": 48.83, + "FiQA2018": 40.38, + "MSMARCO": 35.19, + "NQ": 51.08, + "SciFact": 73.5, + "TRECCOVID": 54.74 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "titan-embed-text-v1", + "BIOSSES": 84.17, + "SICK-R": 73.05, + "STS12": 66.59, + "STS13": 83.24, + "STS14": 73.71, + "STS15": 82.4, + "STS16": NaN, + "STS17": 80.9, + "STSBenchmark": 74.85 + } + ] }, "Summarization": { "cosine_spearman": [] @@ -22096,269 +20977,87 @@ "p-MRR": [] } }, - "komninos": { + "udever-bloom-1b1": { "BitextMining": { - "f1": [ - { - "Model": "komninos", - "BUCC (de-en)": 0.18, - "BUCC (fr-en)": 0.08, - "BUCC (ru-en)": 0.15, - "BUCC (zh-en)": 0.05, - "Tatoeba (afr-eng)": 4.82, - "Tatoeba (amh-eng)": 1.18, - "Tatoeba (ang-eng)": 8.54, - "Tatoeba (ara-eng)": 0.63, - "Tatoeba (arq-eng)": 0.4, - "Tatoeba (arz-eng)": 0.63, - "Tatoeba (ast-eng)": 11.69, - "Tatoeba (awa-eng)": 0.0, - "Tatoeba (aze-eng)": 3.22, - "Tatoeba (bel-eng)": 1.75, - "Tatoeba (ben-eng)": 0.2, - "Tatoeba (ber-eng)": 7.0, - "Tatoeba (bos-eng)": 9.31, - "Tatoeba (bre-eng)": 4.17, - "Tatoeba (bul-eng)": 1.29, - "Tatoeba (cat-eng)": 7.73, - "Tatoeba (cbk-eng)": 5.61, - "Tatoeba (ceb-eng)": 4.88, - "Tatoeba (ces-eng)": 3.55, - "Tatoeba (cha-eng)": 19.29, - "Tatoeba (cmn-eng)": 0.5, - "Tatoeba (cor-eng)": 4.15, - "Tatoeba (csb-eng)": 5.69, - "Tatoeba (cym-eng)": 8.4, - "Tatoeba (dan-eng)": 6.99, - "Tatoeba (deu-eng)": 3.67, - "Tatoeba (dsb-eng)": 5.33, - "Tatoeba (dtp-eng)": 4.25, - "Tatoeba (ell-eng)": 0.63, - "Tatoeba (epo-eng)": 2.45, - "Tatoeba (est-eng)": 2.69, - "Tatoeba (eus-eng)": 4.69, - "Tatoeba (fao-eng)": 7.61, - "Tatoeba (fin-eng)": 3.36, - "Tatoeba (fra-eng)": 7.0, - "Tatoeba (fry-eng)": 12.36, - "Tatoeba (gla-eng)": 3.07, - "Tatoeba (gle-eng)": 4.81, - "Tatoeba (glg-eng)": 8.12, - "Tatoeba (gsw-eng)": 18.87, - "Tatoeba (heb-eng)": 0.68, - "Tatoeba (hin-eng)": 0.1, - "Tatoeba (hrv-eng)": 5.41, - "Tatoeba (hsb-eng)": 6.32, - "Tatoeba (hun-eng)": 3.42, - "Tatoeba (hye-eng)": 0.97, - "Tatoeba (ido-eng)": 7.1, - "Tatoeba (ile-eng)": 13.61, - "Tatoeba (ina-eng)": 8.57, - "Tatoeba (ind-eng)": 7.26, - "Tatoeba (isl-eng)": 4.09, - "Tatoeba (ita-eng)": 5.54, - "Tatoeba (jav-eng)": 11.43, - "Tatoeba (jpn-eng)": 0.2, - "Tatoeba (kab-eng)": 2.71, - "Tatoeba (kat-eng)": 1.11, - "Tatoeba (kaz-eng)": 1.17, - "Tatoeba (khm-eng)": 0.55, - "Tatoeba (kor-eng)": 0.5, - "Tatoeba (kur-eng)": 8.55, - "Tatoeba (kzj-eng)": 4.61, - "Tatoeba (lat-eng)": 4.07, - "Tatoeba (lfn-eng)": 2.83, - "Tatoeba (lit-eng)": 0.95, - "Tatoeba (lvs-eng)": 3.25, - "Tatoeba (mal-eng)": 0.29, - "Tatoeba (mar-eng)": 0.2, - "Tatoeba (max-eng)": 14.53, - "Tatoeba (mhr-eng)": 0.2, - "Tatoeba (mkd-eng)": 0.2, - "Tatoeba (mon-eng)": 1.1, - "Tatoeba (nds-eng)": 10.37, - "Tatoeba (nld-eng)": 9.5, - "Tatoeba (nno-eng)": 4.49, - "Tatoeba (nob-eng)": 4.95, - "Tatoeba (nov-eng)": 14.53, - "Tatoeba (oci-eng)": 5.8, - "Tatoeba (orv-eng)": 0.24, - "Tatoeba (pam-eng)": 6.65, - "Tatoeba (pes-eng)": 0.5, - "Tatoeba (pms-eng)": 8.05, - "Tatoeba (pol-eng)": 5.13, - "Tatoeba (por-eng)": 5.87, - "Tatoeba (ron-eng)": 6.76, - "Tatoeba (rus-eng)": 0.2, - "Tatoeba (slk-eng)": 4.23, - "Tatoeba (slv-eng)": 6.05, - "Tatoeba (spa-eng)": 5.03, - "Tatoeba (sqi-eng)": 4.36, - "Tatoeba (srp-eng)": 1.77, - "Tatoeba (swe-eng)": 6.72, - "Tatoeba (swg-eng)": 8.54, - "Tatoeba (swh-eng)": 11.49, - "Tatoeba (tam-eng)": 1.3, - "Tatoeba (tat-eng)": 0.77, - "Tatoeba (tel-eng)": 0.85, - "Tatoeba (tgl-eng)": 2.61, - "Tatoeba (tha-eng)": 0.69, - "Tatoeba (tuk-eng)": 5.76, - "Tatoeba (tur-eng)": 5.24, - "Tatoeba (tzl-eng)": 15.51, - "Tatoeba (uig-eng)": 0.6, - "Tatoeba (ukr-eng)": 1.23, - "Tatoeba (urd-eng)": 0.4, - "Tatoeba (uzb-eng)": 4.73, - "Tatoeba (vie-eng)": 6.55, - "Tatoeba (war-eng)": 4.12, - "Tatoeba (wuu-eng)": 0.2, - "Tatoeba (xho-eng)": 4.33, - "Tatoeba (yid-eng)": 0.59, - "Tatoeba (yue-eng)": 0.5, - "Tatoeba (zsm-eng)": 7.27 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "komninos", - "AmazonCounterfactualClassification (en)": 60.54, - "AmazonPolarityClassification": 59.59, - "AmazonReviewsClassification (en)": 31.01, - "Banking77Classification": 67.05, - "EmotionClassification": 33.18, - "ImdbClassification": 63.98, - "MTOPDomainClassification (en)": 78.57, - "MTOPIntentClassification (en)": 57.07, - "MassiveIntentClassification (en)": 57.21, - "MassiveScenarioClassification (en)": 66.11, - "ToxicConversationsClassification": 67.76, - "TweetSentimentExtractionClassification": 49.68 + "Model": "udever-bloom-1b1", + "AmazonReviewsClassification": 35.12, + "MTOPDomainClassification": 69.24, + "MTOPIntentClassification": 51.25, + "MasakhaNEWSClassification": 80.83, + "MassiveIntentClassification": 43.21, + "MassiveScenarioClassification": 49.78 } ] }, "Clustering": { "v_measure": [ { - "Model": "komninos", - "ArxivClusteringP2P": 34.73, - "ArxivClusteringS2S": 26.01, - "BiorxivClusteringP2P": 29.76, - "BiorxivClusteringS2S": 20.71, - "BlurbsClusteringP2P": 11.37, - "BlurbsClusteringS2S": 8.01, - "MedrxivClusteringP2P": 26.65, - "MedrxivClusteringS2S": 21.5, - "RedditClustering": 28.84, - "RedditClusteringP2P": 7.37, - "StackExchangeClustering": 39.04, - "StackExchangeClusteringP2P": 30.23, - "TenKGnadClusteringP2P": 15.89, - "TenKGnadClusteringS2S": 4.84, - "TwentyNewsgroupsClustering": 27.42 + "Model": "udever-bloom-1b1", + "AlloProfClusteringP2P": 62.22, + "AlloProfClusteringS2S": 27.06, + "HALClusteringS2S": 13.86, + "MLSUMClusteringP2P": 44.11, + "MLSUMClusteringS2S": 30.47, + "MasakhaNEWSClusteringP2P": 40.2, + "MasakhaNEWSClusteringS2S": 27.35 } ] }, "PairClassification": { "max_ap": [ { - "Model": "komninos", - "SprintDuplicateQuestions": 85.55, - "TwitterSemEval2015": 53.85, - "TwitterURLCorpus": 79.41 + "Model": "udever-bloom-1b1", + "OpusparcusPC": 85.54, + "PawsXPairClassification": 61.99 }, { - "Model": "komninos", - "SprintDuplicateQuestions": 85.55, - "TwitterSemEval2015": 54.02, - "TwitterURLCorpus": 79.41 + "Model": "udever-bloom-1b1", + "OpusparcusPC": 90.15, + "PawsXPairClassification": 63.95 } ] }, "Reranking": { "map": [ { - "Model": "komninos", - "AskUbuntuDupQuestions": 50.88, - "MindSmallReranking": 28.92, - "SciDocsRR": 63.55, - "StackOverflowDupQuestions": 35.65 + "Model": "udever-bloom-1b1", + "AlloprofReranking": 39.13, + "SyntecReranking": 62.58 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "komninos", - "ArguAna": 30.96, - "CQADupstackRetrieval": 16.79, - "ClimateFEVER": 14.87, - "DBPedia": 15.88, - "FEVER": 15.56, - "FiQA2018": 10.49, - "HotpotQA": 20.77, - "MSMARCO": 9.75, - "NFCorpus": 11.79, - "NQ": 12.75, - "QuoraRetrieval": 71.57, - "SCIDOCS": 8.47, - "SciFact": 29.53, - "TRECCOVID": 35.92, - "Touche2020": 13.17 + "Model": "udever-bloom-1b1", + "AlloprofRetrieval": 12.37, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 2.78, + "SyntecRetrieval": 40.57, + "XPQARetrieval": 33.82 } ] }, "STS": { "cosine_spearman": [ { - "Model": "komninos", - "BIOSSES": 50.25, - "SICK-R": 55.49, - "STS12": 53.51, - "STS13": 70.8, - "STS14": 63.56, - "STS15": 74.08, - "STS16": 64.6, - "STS17 (ar-ar)": 13.78, - "STS17 (en-ar)": 9.08, - "STS17 (en-de)": -3.11, - "STS17 (en-en)": 76.91, - "STS17 (en-tr)": -0.45, - "STS17 (es-en)": -8.18, - "STS17 (es-es)": 48.23, - "STS17 (fr-en)": 5.81, - "STS17 (it-en)": 3.64, - "STS17 (ko-ko)": 2.54, - "STS17 (nl-en)": 0.44, - "STS22 (ar)": 32.42, - "STS22 (de)": 33.04, - "STS22 (de-en)": 28.65, - "STS22 (de-fr)": 14.77, - "STS22 (de-pl)": 11.21, - "STS22 (en)": 53.89, - "STS22 (es)": 48.53, - "STS22 (es-en)": 26.97, - "STS22 (es-it)": 41.1, - "STS22 (fr)": 49.43, - "STS22 (fr-pl)": 39.44, - "STS22 (it)": 57.77, - "STS22 (pl)": 12.47, - "STS22 (pl-en)": 45.55, - "STS22 (ru)": 19.44, - "STS22 (tr)": 47.38, - "STS22 (zh)": 4.78, - "STS22 (zh-en)": 14.05, - "STSBenchmark": 61.55 + "Model": "udever-bloom-1b1", + "SICKFr": 59.94, + "STS22": 77.1, + "STSBenchmarkMultilingualSTS": 49.97 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "komninos", - "SummEval": 30.49 + "Model": "udever-bloom-1b1", + "SummEvalFr": 29.48 } ] }, @@ -22369,95 +21068,89 @@ "p-MRR": [] } }, - "text-embedding-3-large-instruct": { + "udever-bloom-560m": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] - }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [ + "accuracy": [ { - "Model": "text-embedding-3-large-instruct", - "ARCChallenge": 21.22, - "AlphaNLI": 34.23, - "HellaSwag": 31.4, - "PIQA": 37.52, - "Quail": 13.6, - "RARbCode": 89.41, - "RARbMath": 87.73, - "SIQA": 4.99, - "SpartQA": 7.45, - "TempReasonL1": 2.07, - "TempReasonL2Fact": 39.77, - "TempReasonL2Pure": 11.04, - "TempReasonL3Fact": 37.04, - "TempReasonL3Pure": 15.51, - "WinoGrande": 33.92 + "Model": "udever-bloom-560m", + "AmazonReviewsClassification": 26.85, + "MTOPDomainClassification": 34.99, + "MTOPIntentClassification": 15.76, + "MasakhaNEWSClassification": 67.94, + "MassiveIntentClassification": 15.09, + "MassiveScenarioClassification": 21.67 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [] - } - }, - "text-search-davinci-001": { - "BitextMining": { - "f1": [] - }, - "Classification": { - "accuracy": [] - }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "udever-bloom-560m", + "AlloProfClusteringP2P": 53.57, + "AlloProfClusteringS2S": 22.13, + "HALClusteringS2S": 7.68, + "MLSUMClusteringP2P": 36.43, + "MLSUMClusteringS2S": 25.26, + "MasakhaNEWSClusteringP2P": 37.57, + "MasakhaNEWSClusteringS2S": 20.58 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "udever-bloom-560m", + "OpusparcusPC": 82.1, + "PawsXPairClassification": 59.69 + }, + { + "Model": "udever-bloom-560m", + "OpusparcusPC": 85.87, + "PawsXPairClassification": 61.99 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "udever-bloom-560m", + "AlloprofReranking": 28.75, + "SyntecReranking": 50.88 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-search-davinci-001", - "ArguAna": 43.5, - "ClimateFEVER": 22.3, - "FEVER": 77.5, - "FiQA2018": 51.2, - "HotpotQA": 68.8, - "NFCorpus": 40.7, - "QuoraRetrieval": 63.8, - "SciFact": 75.4, - "TRECCOVID": 64.9, - "Touche2020": 29.1 + "Model": "udever-bloom-560m", + "AlloprofRetrieval": 1.98, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 0.48, + "SyntecRetrieval": 24.45, + "XPQARetrieval": 12.98 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "udever-bloom-560m", + "SICKFr": 54.54, + "STS22": 61.35, + "STSBenchmarkMultilingualSTS": 36.78 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "udever-bloom-560m", + "SummEvalFr": 23.63 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -22466,94 +21159,89 @@ "p-MRR": [] } }, - "bge-small-zh-v1.5": { + "universal-sentence-encoder-multilingual-3": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "bge-small-zh-v1.5", - "AmazonReviewsClassification (zh)": 35.91, - "IFlyTek": 45.49, - "JDReview": 80.04, - "MassiveIntentClassification (zh-CN)": 63.95, - "MassiveScenarioClassification (zh-CN)": 70.8, - "MultilingualSentiment": 63.06, - "OnlineShopping": 85.05, - "TNews": 48.15, - "Waimai": 83.18 + "Model": "universal-sentence-encoder-multilingual-3", + "AmazonReviewsClassification": 33.51, + "MTOPDomainClassification": 85.5, + "MTOPIntentClassification": 53.98, + "MasakhaNEWSClassification": 82.06, + "MassiveIntentClassification": 61.19, + "MassiveScenarioClassification": 70.22 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-small-zh-v1.5", - "CLSClusteringP2P": 38.14, - "CLSClusteringS2S": 35.14, - "ThuNewsClusteringP2P": 54.22, - "ThuNewsClusteringS2S": 49.22 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloProfClusteringP2P": 56.9, + "AlloProfClusteringS2S": 37.84, + "HALClusteringS2S": 18.95, + "MLSUMClusteringP2P": 43.9, + "MLSUMClusteringS2S": 35.5, + "MasakhaNEWSClusteringP2P": 60.57, + "MasakhaNEWSClusteringS2S": 40.31 } ] }, "PairClassification": { "max_ap": [ { - "Model": "bge-small-zh-v1.5", - "Cmnli": 76.24, - "Ocnli": 64.57 + "Model": "universal-sentence-encoder-multilingual-3", + "OpusparcusPC": 91.46, + "PawsXPairClassification": 52.39 }, { - "Model": "bge-small-zh-v1.5", - "Cmnli": 76.24, - "Ocnli": 64.57 + "Model": "universal-sentence-encoder-multilingual-3", + "OpusparcusPC": 91.46, + "PawsXPairClassification": 52.41 } ] }, "Reranking": { "map": [ { - "Model": "bge-small-zh-v1.5", - "CMedQAv1": 77.4, - "CMedQAv2": 79.86, - "MMarcoReranking": 20.5, - "T2Reranking": 65.9 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloprofReranking": 56.23, + "SyntecReranking": 73.85 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-small-zh-v1.5", - "CmedqaRetrieval": 35.11, - "CovidRetrieval": 70.14, - "DuRetrieval": 77.28, - "EcomRetrieval": 55.71, - "MMarcoRetrieval": 63.48, - "MedicalRetrieval": 49.8, - "T2Retrieval": 76.43, - "VideoRetrieval": 66.19 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloprofRetrieval": 35.27, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 26.12, + "SyntecRetrieval": 69.82, + "XPQARetrieval": 59.59 } ] }, "STS": { "cosine_spearman": [ { - "Model": "bge-small-zh-v1.5", - "AFQMC": 33.42, - "ATEC": 43.01, - "BQ": 55.22, - "LCQMC": 72.19, - "PAWSX": 9.26, - "QBQTC": 35.29, - "STS22 (zh)": 67.72, - "STSB": 76.73 + "Model": "universal-sentence-encoder-multilingual-3", + "SICKFr": 71.37, + "STS22": 77.91, + "STSBenchmarkMultilingualSTS": 75.48 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "universal-sentence-encoder-multilingual-3", + "SummEvalFr": 28.21 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -22562,186 +21250,209 @@ "p-MRR": [] } }, - "instructor-xl": { + "universal-sentence-encoder-multilingual-large-3": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "AmazonReviewsClassification": 35.09, + "MTOPDomainClassification": 88.19, + "MTOPIntentClassification": 63.64, + "MasakhaNEWSClassification": 72.04, + "MassiveIntentClassification": 65.8, + "MassiveScenarioClassification": 73.47 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "AlloProfClusteringP2P": 54.21, + "AlloProfClusteringS2S": 37.95, + "HALClusteringS2S": 18.94, + "MLSUMClusteringP2P": 41.02, + "MLSUMClusteringS2S": 37.97, + "MasakhaNEWSClusteringP2P": 24.09, + "MasakhaNEWSClusteringS2S": 40.24 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "OpusparcusPC": 93.38, + "PawsXPairClassification": 53.62 + }, + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "OpusparcusPC": 93.38, + "PawsXPairClassification": 53.66 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "AlloprofReranking": 55.39, + "SyntecReranking": 77.13 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "instructor-xl", - "BrightRetrieval (aops)": 8.26, - "BrightRetrieval (robotics)": 17.39, - "BrightRetrieval (economics)": 22.81, - "BrightRetrieval (stackoverflow)": 19.06, - "BrightRetrieval (leetcode)": 27.5, - "BrightRetrieval (theoremqa_questions)": 14.59, - "BrightRetrieval (psychology)": 27.43, - "BrightRetrieval (biology)": 21.91, - "BrightRetrieval (theoremqa_theorems)": 6.22, - "BrightRetrieval (earth_science)": 34.35, - "BrightRetrieval (sustainable_living)": 18.82, - "BrightRetrieval (pony)": 5.02 - } - ], - "recall_at_1": [ - { - "Model": "instructor-xl", - "BrightRetrieval (stackoverflow)": 14.96, - "BrightRetrieval (biology)": 22.01, - "BrightRetrieval (sustainable_living)": 20.14, - "BrightRetrieval (pony)": 5.93, - "BrightRetrieval (psychology)": 20.5, - "BrightRetrieval (robotics)": 12.87, - "BrightRetrieval (economics)": 14.08, - "BrightRetrieval (earth_science)": 32.04 + "Model": "universal-sentence-encoder-multilingual-large-3", + "AlloprofRetrieval": 33.78, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 26.21, + "SyntecRetrieval": 63.69, + "XPQARetrieval": 65.21 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "SICKFr": 74.39, + "STS22": 71.11, + "STSBenchmarkMultilingualSTS": 78.16 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "universal-sentence-encoder-multilingual-large-3", + "SummEvalFr": 28.56 + } + ] }, "MultilabelClassification": { "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "instructor-xl", - "Core17InstructionRetrieval": 0.69, - "News21InstructionRetrieval": -0.9, - "Robust04InstructionRetrieval": -8.08 - } - ] + "p-MRR": [] } }, - "elser-v2": { + "unsup-simcse-bert-base-uncased": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "elser-v2", - "AmazonCounterfactualClassification (en)": 74.16, - "AmazonPolarityClassification": 61.91, - "AmazonReviewsClassification (en)": 32.06, - "Banking77Classification": 82.05, - "EmotionClassification": 46.65, - "ImdbClassification": 65.02, - "MTOPDomainClassification (en)": 93.17, - "MTOPIntentClassification (en)": 71.1, - "MassiveIntentClassification (en)": 68.48, - "MassiveScenarioClassification (en)": 74.98, - "ToxicConversationsClassification": 68.15, - "TweetSentimentExtractionClassification": 53.57 + "Model": "unsup-simcse-bert-base-uncased", + "AmazonCounterfactualClassification": 67.09, + "AmazonPolarityClassification": 74.48, + "AmazonReviewsClassification": 33.85, + "Banking77Classification": 73.55, + "EmotionClassification": 42.22, + "ImdbClassification": 69.63, + "MTOPDomainClassification": 81.71, + "MTOPIntentClassification": 59.23, + "MassiveIntentClassification": 59.84, + "MassiveScenarioClassification": 66.25, + "ToxicConversationsClassification": 68.82, + "TweetSentimentExtractionClassification": 53.36 } ] }, "Clustering": { "v_measure": [ { - "Model": "elser-v2", - "ArxivClusteringP2P": 35.27, - "ArxivClusteringS2S": 23.18, - "BiorxivClusteringP2P": 31.13, - "BiorxivClusteringS2S": 26.78, - "MedrxivClusteringP2P": 24.65, - "MedrxivClusteringS2S": 24.21, - "RedditClustering": 38.74, - "RedditClusteringP2P": 51.92, - "StackExchangeClustering": 42.7, - "StackExchangeClusteringP2P": 28.7, - "TwentyNewsgroupsClustering": 27.82 + "Model": "unsup-simcse-bert-base-uncased", + "ArxivClusteringP2P": 32.61, + "ArxivClusteringS2S": 24.68, + "BiorxivClusteringP2P": 24.9, + "BiorxivClusteringS2S": 19.55, + "MedrxivClusteringP2P": 23.6, + "MedrxivClusteringS2S": 21.97, + "RedditClustering": 32.18, + "RedditClusteringP2P": 45.14, + "StackExchangeClustering": 43.07, + "StackExchangeClusteringP2P": 28.5, + "TwentyNewsgroupsClustering": 23.21 } ] }, - "PairClassification": { - "max_ap": [ - { - "Model": "elser-v2", - "SprintDuplicateQuestions": 94.53, - "TwitterSemEval2015": 64.41, - "TwitterURLCorpus": 85.01 + "PairClassification": { + "max_ap": [ + { + "Model": "unsup-simcse-bert-base-uncased", + "SprintDuplicateQuestions": 69.41, + "TwitterSemEval2015": 60.21, + "TwitterURLCorpus": 81.37 }, { - "Model": "elser-v2", - "SprintDuplicateQuestions": 94.53, - "TwitterSemEval2015": 64.41, - "TwitterURLCorpus": 85.01 + "Model": "unsup-simcse-bert-base-uncased", + "SprintDuplicateQuestions": 78.03, + "TwitterSemEval2015": 61.01, + "TwitterURLCorpus": 81.37 } ] }, "Reranking": { "map": [ { - "Model": "elser-v2", - "AskUbuntuDupQuestions": 58.31, - "MindSmallReranking": 30.75, - "SciDocsRR": 75.62, - "StackOverflowDupQuestions": 48.4 + "Model": "unsup-simcse-bert-base-uncased", + "AskUbuntuDupQuestions": 51.57, + "MindSmallReranking": 28.62, + "SciDocsRR": 66.33, + "StackOverflowDupQuestions": 39.35 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "elser-v2", - "ArguAna": 55.98, - "CQADupstackRetrieval": 34.27, - "ClimateFEVER": 27.08, - "DBPedia": 42.7, - "FEVER": 78.55, - "FiQA2018": 41.57, - "HotpotQA": 67.01, - "MSMARCO": 38.9, - "NFCorpus": 36.66, - "NQ": 55.84, - "QuoraRetrieval": 84.69, - "SCIDOCS": 16.24, - "SciFact": 71.8, - "TRECCOVID": 72.72, - "Touche2020": 26.27 + "Model": "unsup-simcse-bert-base-uncased", + "ArguAna": 38.34, + "CQADupstackRetrieval": 13.22, + "ClimateFEVER": 11.8, + "DBPedia": 15.04, + "FEVER": 21.06, + "FiQA2018": 9.84, + "HotpotQA": 19.75, + "MSMARCO": 9.35, + "NFCorpus": 9.88, + "NQ": 11.69, + "QuoraRetrieval": 78.03, + "SCIDOCS": 5.5, + "SciFact": 25.72, + "TRECCOVID": 26.2, + "Touche2020": 8.9 } ] }, "STS": { "cosine_spearman": [ { - "Model": "elser-v2", - "BIOSSES": 83.79, - "SICK-R": 68.78, - "STS12": 64.81, - "STS13": 80.1, - "STS14": 74.96, - "STS15": 83.7, - "STS16": 80.55, - "STS17 (en-en)": 85.74, - "STS22 (en)": 67.5, - "STSBenchmark": 79.54 + "Model": "unsup-simcse-bert-base-uncased", + "BIOSSES": 72.31, + "SICK-R": 72.24, + "STS12": 66.05, + "STS13": 81.49, + "STS14": 73.61, + "STS15": 79.72, + "STS16": 78.12, + "STS17": 83.58, + "STS22": 59.65, + "STSBenchmark": 76.52 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "elser-v2", - "SummEval": 31.03 + "Model": "unsup-simcse-bert-base-uncased", + "SummEval": 31.15 } ] }, @@ -22752,7 +21463,7 @@ "p-MRR": [] } }, - "monot5-base-msmarco-10k": { + "use-cmlm-multilingual": { "BitextMining": { "f1": [] }, @@ -22760,71 +21471,16 @@ "accuracy": [] }, "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "monot5-base-msmarco-10k", - "Core17InstructionRetrieval": -4.06, - "News21InstructionRetrieval": 5.02, - "Robust04InstructionRetrieval": -6.2 - } - ] - } - }, - "bert-base-swedish-cased": { - "BitextMining": { - "f1": [ - { - "Model": "bert-base-swedish-cased", - "BornholmBitextMining": 6.6 - } - ] - }, - "Classification": { - "accuracy": [ + "v_measure": [ { - "Model": "bert-base-swedish-cased", - "AngryTweetsClassification": 44.58, - "DKHateClassification": 55.53, - "DanishPoliticalCommentsClassification": 28.97, - "LccSentimentClassification": 41.2, - "MassiveIntentClassification (da)": 37.98, - "MassiveIntentClassification (nb)": 35.75, - "MassiveIntentClassification (sv)": 52.75, - "MassiveScenarioClassification (da)": 40.44, - "MassiveScenarioClassification (nb)": 35.76, - "MassiveScenarioClassification (sv)": 56.09, - "NoRecClassification": 43.91, - "NordicLangClassification": 62.45, - "NorwegianParliament": 57.56, - "ScalaDaClassification": 53.53, - "ScalaNbClassification": 53.63 + "Model": "use-cmlm-multilingual", + "BlurbsClusteringP2P": 29.63, + "BlurbsClusteringS2S": 15.24, + "TenKGnadClusteringP2P": 37.1, + "TenKGnadClusteringS2S": 25.64 } ] }, - "Clustering": { - "v_measure": [] - }, "PairClassification": { "max_ap": [] }, @@ -22847,49 +21503,89 @@ "p-MRR": [] } }, - "all-mpnet-base-v2-instruct": { + "voyage-2": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-2", + "AmazonReviewsClassification": 37.26, + "MTOPDomainClassification": 79.79, + "MTOPIntentClassification": 45.62, + "MasakhaNEWSClassification": 80.19, + "MassiveIntentClassification": 53.7, + "MassiveScenarioClassification": 62.46 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "voyage-2", + "AlloProfClusteringP2P": 57.96, + "AlloProfClusteringS2S": 41.65, + "HALClusteringS2S": 24.84, + "MLSUMClusteringP2P": 45.08, + "MLSUMClusteringS2S": 38.77, + "MasakhaNEWSClusteringP2P": 48.54, + "MasakhaNEWSClusteringS2S": 36.33 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "voyage-2", + "OpusparcusPC": 89.76, + "PawsXPairClassification": 58.96 + }, + { + "Model": "voyage-2", + "OpusparcusPC": 89.83, + "PawsXPairClassification": 58.97 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "voyage-2", + "AlloprofReranking": 63.54, + "SyntecReranking": 82.65 + } + ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "all-mpnet-base-v2-instruct", - "ARCChallenge": 10.35, - "AlphaNLI": 1.96, - "HellaSwag": 13.01, - "PIQA": 27.18, - "Quail": 3.02, - "RARbCode": 48.95, - "RARbMath": 69.21, - "SIQA": 1.29, - "SpartQA": 1.01, - "TempReasonL1": 1.52, - "TempReasonL2Fact": 7.28, - "TempReasonL2Pure": 1.03, - "TempReasonL3Fact": 7.03, - "TempReasonL3Pure": 5.16, - "WinoGrande": 9.66 + "Model": "voyage-2", + "AlloprofRetrieval": 45.5, + "BSARDRetrieval": 0.15, + "MintakaRetrieval": 15.51, + "SyntecRetrieval": 75.83, + "XPQARetrieval": 67.07 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-2", + "SICKFr": 68.51, + "STS22": 70.51, + "STSBenchmarkMultilingualSTS": 76.43 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-2", + "SummEvalFr": 30.88 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -22898,263 +21594,87 @@ "p-MRR": [] } }, - "gtr-t5-large": { + "voyage-code-2": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "gtr-t5-large", - "AmazonCounterfactualClassification (de)": 59.38, - "AmazonCounterfactualClassification (en)": 70.03, - "AmazonCounterfactualClassification (en-ext)": 69.86, - "AmazonCounterfactualClassification (ja)": 45.87, - "AmazonPolarityClassification": 73.92, - "AmazonReviewsClassification (de)": 33.06, - "AmazonReviewsClassification (en)": 37.21, - "AmazonReviewsClassification (es)": 34.0, - "AmazonReviewsClassification (fr)": 33.48, - "AmazonReviewsClassification (ja)": 21.78, - "AmazonReviewsClassification (zh)": 21.83, - "Banking77Classification": 81.21, - "EmotionClassification": 46.33, - "ImdbClassification": 70.86, - "MTOPDomainClassification (de)": 81.91, - "MTOPDomainClassification (en)": 94.01, - "MTOPDomainClassification (es)": 84.7, - "MTOPDomainClassification (fr)": 82.48, - "MTOPDomainClassification (hi)": 22.11, - "MTOPDomainClassification (th)": 16.36, - "MTOPIntentClassification (de)": 52.13, - "MTOPIntentClassification (en)": 63.86, - "MTOPIntentClassification (es)": 52.62, - "MTOPIntentClassification (fr)": 46.39, - "MTOPIntentClassification (hi)": 3.9, - "MTOPIntentClassification (th)": 5.38, - "MassiveIntentClassification (af)": 41.02, - "MassiveIntentClassification (am)": 2.34, - "MassiveIntentClassification (ar)": 4.87, - "MassiveIntentClassification (az)": 34.92, - "MassiveIntentClassification (bn)": 2.52, - "MassiveIntentClassification (cy)": 35.87, - "MassiveIntentClassification (da)": 45.3, - "MassiveIntentClassification (de)": 51.48, - "MassiveIntentClassification (el)": 10.0, - "MassiveIntentClassification (en)": 70.06, - "MassiveIntentClassification (es)": 53.3, - "MassiveIntentClassification (fa)": 3.59, - "MassiveIntentClassification (fi)": 37.35, - "MassiveIntentClassification (fr)": 54.83, - "MassiveIntentClassification (he)": 2.52, - "MassiveIntentClassification (hi)": 2.88, - "MassiveIntentClassification (hu)": 33.52, - "MassiveIntentClassification (hy)": 3.13, - "MassiveIntentClassification (id)": 40.11, - "MassiveIntentClassification (is)": 34.77, - "MassiveIntentClassification (it)": 51.21, - "MassiveIntentClassification (ja)": 4.75, - "MassiveIntentClassification (jv)": 35.6, - "MassiveIntentClassification (ka)": 2.71, - "MassiveIntentClassification (km)": 5.48, - "MassiveIntentClassification (kn)": 2.44, - "MassiveIntentClassification (ko)": 2.59, - "MassiveIntentClassification (lv)": 38.15, - "MassiveIntentClassification (ml)": 2.67, - "MassiveIntentClassification (mn)": 18.47, - "MassiveIntentClassification (ms)": 35.58, - "MassiveIntentClassification (my)": 4.35, - "MassiveIntentClassification (nb)": 43.78, - "MassiveIntentClassification (nl)": 45.96, - "MassiveIntentClassification (pl)": 39.08, - "MassiveIntentClassification (pt)": 52.27, - "MassiveIntentClassification (ro)": 46.39, - "MassiveIntentClassification (ru)": 16.82, - "MassiveIntentClassification (sl)": 37.3, - "MassiveIntentClassification (sq)": 41.73, - "MassiveIntentClassification (sv)": 43.51, - "MassiveIntentClassification (sw)": 35.97, - "MassiveIntentClassification (ta)": 1.52, - "MassiveIntentClassification (te)": 2.57, - "MassiveIntentClassification (th)": 3.94, - "MassiveIntentClassification (tl)": 41.03, - "MassiveIntentClassification (tr)": 33.75, - "MassiveIntentClassification (ur)": 2.57, - "MassiveIntentClassification (vi)": 25.23, - "MassiveIntentClassification (zh-CN)": 2.41, - "MassiveIntentClassification (zh-TW)": 4.64, - "MassiveScenarioClassification (af)": 51.48, - "MassiveScenarioClassification (am)": 7.74, - "MassiveScenarioClassification (ar)": 12.03, - "MassiveScenarioClassification (az)": 41.77, - "MassiveScenarioClassification (bn)": 8.07, - "MassiveScenarioClassification (cy)": 43.67, - "MassiveScenarioClassification (da)": 54.88, - "MassiveScenarioClassification (de)": 63.63, - "MassiveScenarioClassification (el)": 16.83, - "MassiveScenarioClassification (en)": 75.49, - "MassiveScenarioClassification (es)": 61.48, - "MassiveScenarioClassification (fa)": 6.48, - "MassiveScenarioClassification (fi)": 43.54, - "MassiveScenarioClassification (fr)": 64.06, - "MassiveScenarioClassification (he)": 8.03, - "MassiveScenarioClassification (hi)": 7.5, - "MassiveScenarioClassification (hu)": 42.59, - "MassiveScenarioClassification (hy)": 9.22, - "MassiveScenarioClassification (id)": 48.67, - "MassiveScenarioClassification (is)": 43.87, - "MassiveScenarioClassification (it)": 59.83, - "MassiveScenarioClassification (ja)": 5.62, - "MassiveScenarioClassification (jv)": 42.18, - "MassiveScenarioClassification (ka)": 7.52, - "MassiveScenarioClassification (km)": 9.55, - "MassiveScenarioClassification (kn)": 8.34, - "MassiveScenarioClassification (ko)": 6.11, - "MassiveScenarioClassification (lv)": 43.35, - "MassiveScenarioClassification (ml)": 7.28, - "MassiveScenarioClassification (mn)": 23.94, - "MassiveScenarioClassification (ms)": 45.18, - "MassiveScenarioClassification (my)": 9.33, - "MassiveScenarioClassification (nb)": 52.71, - "MassiveScenarioClassification (nl)": 57.02, - "MassiveScenarioClassification (pl)": 46.79, - "MassiveScenarioClassification (pt)": 59.45, - "MassiveScenarioClassification (ro)": 56.8, - "MassiveScenarioClassification (ru)": 25.85, - "MassiveScenarioClassification (sl)": 42.51, - "MassiveScenarioClassification (sq)": 50.41, - "MassiveScenarioClassification (sv)": 54.16, - "MassiveScenarioClassification (sw)": 43.02, - "MassiveScenarioClassification (ta)": 7.21, - "MassiveScenarioClassification (te)": 6.9, - "MassiveScenarioClassification (th)": 8.7, - "MassiveScenarioClassification (tl)": 51.76, - "MassiveScenarioClassification (tr)": 42.54, - "MassiveScenarioClassification (ur)": 9.32, - "MassiveScenarioClassification (vi)": 31.51, - "MassiveScenarioClassification (zh-CN)": 3.84, - "MassiveScenarioClassification (zh-TW)": 8.16, - "ToxicConversationsClassification": 68.65, - "TweetSentimentExtractionClassification": 54.09 + "Model": "voyage-code-2", + "AmazonReviewsClassification": 42.15, + "MTOPDomainClassification": 87.68, + "MTOPIntentClassification": 59.44, + "MasakhaNEWSClassification": 82.13, + "MassiveIntentClassification": 63.08, + "MassiveScenarioClassification": 70.15 } ] }, "Clustering": { "v_measure": [ { - "Model": "gtr-t5-large", - "ArxivClusteringP2P": 37.5, - "ArxivClusteringS2S": 30.55, - "BiorxivClusteringP2P": 29.59, - "BiorxivClusteringS2S": 25.72, - "MedrxivClusteringP2P": 28.72, - "MedrxivClusteringS2S": 27.39, - "RedditClustering": 61.69, - "RedditClusteringP2P": 61.67, - "StackExchangeClustering": 69.93, - "StackExchangeClusteringP2P": 33.21, - "TwentyNewsgroupsClustering": 51.64 + "Model": "voyage-code-2", + "AlloProfClusteringP2P": 61.63, + "AlloProfClusteringS2S": 50.67, + "HALClusteringS2S": 27.44, + "MLSUMClusteringP2P": 45.23, + "MLSUMClusteringS2S": 41.48, + "MasakhaNEWSClusteringP2P": 56.59, + "MasakhaNEWSClusteringS2S": 35.18 } ] }, "PairClassification": { "max_ap": [ { - "Model": "gtr-t5-large", - "SprintDuplicateQuestions": 95.05, - "TwitterSemEval2015": 76.03, - "TwitterURLCorpus": 84.89 + "Model": "voyage-code-2", + "OpusparcusPC": 92.87, + "PawsXPairClassification": 60.83 }, { - "Model": "gtr-t5-large", - "SprintDuplicateQuestions": 95.05, - "TwitterSemEval2015": 76.03, - "TwitterURLCorpus": 84.89 + "Model": "voyage-code-2", + "OpusparcusPC": 92.87, + "PawsXPairClassification": 60.88 } ] }, "Reranking": { "map": [ { - "Model": "gtr-t5-large", - "AskUbuntuDupQuestions": 61.64, - "MindSmallReranking": 31.84, - "SciDocsRR": 76.39, - "StackOverflowDupQuestions": 51.58 + "Model": "voyage-code-2", + "AlloprofReranking": 70.79, + "SyntecReranking": 86.77 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "gtr-t5-large", - "ArguAna": 52.09, - "CQADupstackRetrieval": 36.62, - "ClimateFEVER": 26.9, - "DBPedia": 39.55, - "FEVER": 72.66, - "FiQA2018": 42.79, - "HotpotQA": 57.85, - "MSMARCO": 42.73, - "NFCorpus": 32.63, - "NQ": 55.09, - "QuoraRetrieval": 88.47, - "SCIDOCS": 15.51, - "SciFact": 63.42, - "TRECCOVID": 56.68, - "Touche2020": 28.29 + "Model": "voyage-code-2", + "AlloprofRetrieval": 52.61, + "BSARDRetrieval": 0.29, + "MintakaRetrieval": 19.05, + "SyntecRetrieval": 82.77, + "XPQARetrieval": 71.95 } ] }, "STS": { "cosine_spearman": [ { - "Model": "gtr-t5-large", - "BIOSSES": 84.86, - "SICK-R": 73.39, - "STS12": 70.33, - "STS13": 82.19, - "STS14": 77.16, - "STS15": 86.31, - "STS16": 81.85, - "STS17 (ar-ar)": 10.19, - "STS17 (en-ar)": -5.77, - "STS17 (en-de)": 67.43, - "STS17 (en-en)": 83.93, - "STS17 (en-tr)": 8.75, - "STS17 (es-en)": 54.96, - "STS17 (es-es)": 82.74, - "STS17 (fr-en)": 60.5, - "STS17 (it-en)": 46.26, - "STS17 (ko-ko)": 8.96, - "STS17 (nl-en)": 47.48, - "STS22 (ar)": 34.97, - "STS22 (de)": 51.7, - "STS22 (de-en)": 48.76, - "STS22 (de-fr)": 57.5, - "STS22 (de-pl)": 32.76, - "STS22 (en)": 64.3, - "STS22 (es)": 57.49, - "STS22 (es-en)": 67.76, - "STS22 (es-it)": 57.18, - "STS22 (fr)": 78.7, - "STS22 (fr-pl)": 61.98, - "STS22 (it)": 67.67, - "STS22 (pl)": 30.68, - "STS22 (pl-en)": 54.17, - "STS22 (ru)": 15.36, - "STS22 (tr)": 58.12, - "STS22 (zh)": 27.32, - "STS22 (zh-en)": 29.42, - "STSBenchmark": 77.6 + "Model": "voyage-code-2", + "SICKFr": 73.56, + "STS22": 79.99, + "STSBenchmarkMultilingualSTS": 79.02 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "gtr-t5-large", - "SummEval": 29.5 + "Model": "voyage-code-2", + "SummEvalFr": 28.34 } ] }, @@ -23165,375 +21685,368 @@ "p-MRR": [] } }, - "rubert-tiny-turbo": { + "voyage-large-2-instruct": { "BitextMining": { - "f1": [ - { - "Model": "rubert-tiny-turbo", - "Tatoeba (rus-Cyrl_eng-Latn)": 83.14 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "rubert-tiny-turbo", - "AmazonPolarityClassification": 68.36, - "Banking77Classification": 59.86, - "EmotionClassification": 29.5, - "GeoreviewClassification (rus-Cyrl)": 41.36, - "HeadlineClassification (rus-Cyrl)": 68.9, - "ImdbClassification": 58.36, - "InappropriatenessClassification (rus-Cyrl)": 59.11, - "KinopoiskClassification (rus-Cyrl)": 50.47, - "MassiveIntentClassification (cmo-Hans)": 5.21, - "MassiveIntentClassification (kor-Kore)": 2.53, - "MassiveIntentClassification (hin-Deva)": 2.56, - "MassiveIntentClassification (kan-Knda)": 2.06, - "MassiveIntentClassification (kat-Geor)": 2.64, - "MassiveIntentClassification (amh-Ethi)": 2.28, - "MassiveIntentClassification (mya-Mymr)": 3.96, - "MassiveIntentClassification (ell-Grek)": 9.66, - "MassiveIntentClassification (lav-Latn)": 22.32, - "MassiveIntentClassification (mal-Mlym)": 2.39, - "MassiveIntentClassification (mon-Cyrl)": 28.99, - "MassiveIntentClassification (urd-Arab)": 2.45, - "MassiveIntentClassification (fas-Arab)": 3.34, - "MassiveIntentClassification (ron-Latn)": 31.72, - "MassiveIntentClassification (isl-Latn)": 24.85, - "MassiveIntentClassification (en)": 50.16, - "MassiveIntentClassification (hun-Latn)": 25.52, - "MassiveIntentClassification (fra-Latn)": 31.51, - "MassiveIntentClassification (tha-Thai)": 3.74, - "MassiveIntentClassification (deu-Latn)": 32.1, - "MassiveIntentClassification (tur-Latn)": 27.56, - "MassiveIntentClassification (por-Latn)": 34.35, - "MassiveIntentClassification (sqi-Latn)": 32.38, - "MassiveIntentClassification (cmo-Hant)": 6.81, - "MassiveIntentClassification (hye-Armn)": 2.72, - "MassiveIntentClassification (dan-Latn)": 33.95, - "MassiveIntentClassification (afr-Latn)": 30.4, - "MassiveIntentClassification (ara-Arab)": 3.8, - "MassiveIntentClassification (jav-Latn)": 28.53, - "MassiveIntentClassification (tel-Telu)": 2.21, - "MassiveIntentClassification (tgl-Latn)": 32.02, - "MassiveIntentClassification (swa-Latn)": 27.79, - "MassiveIntentClassification (jpn-Jpan)": 5.61, - "MassiveIntentClassification (msa-Latn)": 28.94, - "MassiveIntentClassification (nob-Latn)": 32.3, - "MassiveIntentClassification (fin-Latn)": 31.13, - "MassiveIntentClassification (ind-Latn)": 33.56, - "MassiveIntentClassification (cym-Latn)": 31.68, - "MassiveIntentClassification (slv-Latn)": 31.39, - "MassiveIntentClassification (spa-Latn)": 31.03, - "MassiveIntentClassification (ben-Beng)": 3.08, - "MassiveIntentClassification (swe-Latn)": 30.23, - "MassiveIntentClassification (rus-Cyrl)": 57.98, - "MassiveIntentClassification (aze-Latn)": 23.58, - "MassiveIntentClassification (ita-Latn)": 35.24, - "MassiveIntentClassification (pol-Latn)": 26.82, - "MassiveIntentClassification (vie-Latn)": 23.72, - "MassiveIntentClassification (tam-Taml)": 1.5, - "MassiveIntentClassification (heb-Hebr)": 2.25, - "MassiveIntentClassification (nld-Latn)": 32.44, - "MassiveIntentClassification (khm-Khmr)": 5.14, - "MassiveScenarioClassification (cmo-Hans)": 10.6, - "MassiveScenarioClassification (kor-Kore)": 5.63, - "MassiveScenarioClassification (hin-Deva)": 7.41, - "MassiveScenarioClassification (kan-Knda)": 7.6, - "MassiveScenarioClassification (kat-Geor)": 7.01, - "MassiveScenarioClassification (amh-Ethi)": 7.68, - "MassiveScenarioClassification (mya-Mymr)": 10.73, - "MassiveScenarioClassification (ell-Grek)": 17.95, - "MassiveScenarioClassification (lav-Latn)": 29.29, - "MassiveScenarioClassification (mal-Mlym)": 6.92, - "MassiveScenarioClassification (mon-Cyrl)": 33.7, - "MassiveScenarioClassification (urd-Arab)": 8.53, - "MassiveScenarioClassification (fas-Arab)": 6.62, - "MassiveScenarioClassification (ron-Latn)": 40.02, - "MassiveScenarioClassification (isl-Latn)": 33.1, - "MassiveScenarioClassification (en)": 61.29, - "MassiveScenarioClassification (hun-Latn)": 36.41, - "MassiveScenarioClassification (fra-Latn)": 42.9, - "MassiveScenarioClassification (tha-Thai)": 8.26, - "MassiveScenarioClassification (deu-Latn)": 42.07, - "MassiveScenarioClassification (tur-Latn)": 34.85, - "MassiveScenarioClassification (por-Latn)": 40.79, - "MassiveScenarioClassification (sqi-Latn)": 42.66, - "MassiveScenarioClassification (cmo-Hant)": 11.93, - "MassiveScenarioClassification (hye-Armn)": 8.78, - "MassiveScenarioClassification (dan-Latn)": 43.69, - "MassiveScenarioClassification (afr-Latn)": 40.84, - "MassiveScenarioClassification (ara-Arab)": 11.86, - "MassiveScenarioClassification (jav-Latn)": 37.23, - "MassiveScenarioClassification (tel-Telu)": 6.91, - "MassiveScenarioClassification (tgl-Latn)": 38.16, - "MassiveScenarioClassification (swa-Latn)": 35.66, - "MassiveScenarioClassification (jpn-Jpan)": 10.6, - "MassiveScenarioClassification (msa-Latn)": 38.97, - "MassiveScenarioClassification (nob-Latn)": 39.05, - "MassiveScenarioClassification (fin-Latn)": 35.19, - "MassiveScenarioClassification (ind-Latn)": 39.54, - "MassiveScenarioClassification (cym-Latn)": 39.85, - "MassiveScenarioClassification (slv-Latn)": 35.98, - "MassiveScenarioClassification (spa-Latn)": 37.13, - "MassiveScenarioClassification (ben-Beng)": 8.85, - "MassiveScenarioClassification (swe-Latn)": 36.12, - "MassiveScenarioClassification (rus-Cyrl)": 62.9, - "MassiveScenarioClassification (aze-Latn)": 30.32, - "MassiveScenarioClassification (ita-Latn)": 42.69, - "MassiveScenarioClassification (pol-Latn)": 31.62, - "MassiveScenarioClassification (vie-Latn)": 31.89, - "MassiveScenarioClassification (tam-Taml)": 7.01, - "MassiveScenarioClassification (heb-Hebr)": 7.61, - "MassiveScenarioClassification (nld-Latn)": 40.94, - "MassiveScenarioClassification (khm-Khmr)": 8.51, - "RuReviewsClassification (rus-Cyrl)": 60.66, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.93, - "RuSciBenchOECDClassification (rus-Cyrl)": 40.79, - "ToxicConversationsClassification": 57.77, - "TweetSentimentExtractionClassification": 55.3 + "Model": "voyage-large-2-instruct", + "AmazonCounterfactualClassification": 77.6, + "AmazonPolarityClassification": 96.58, + "AmazonReviewsClassification": 50.77, + "Banking77Classification": 86.96, + "EmotionClassification": 59.81, + "ImdbClassification": 96.13, + "MTOPDomainClassification": 98.86, + "MTOPIntentClassification": 86.97, + "MassiveIntentClassification": 81.08, + "MassiveScenarioClassification": 87.95, + "ToxicConversationsClassification": 83.58, + "TweetSentimentExtractionClassification": 71.55 } ] }, "Clustering": { "v_measure": [ { - "Model": "rubert-tiny-turbo", - "ArxivClusteringP2P": 24.83, - "ArxivClusteringS2S": 16.68, - "BiorxivClusteringP2P": 20.0, - "BiorxivClusteringS2S": 12.67, - "GeoreviewClusteringP2P (rus-Cyrl)": 59.71, - "MLSUMClusteringP2P (rus-Cyrl)": 40.02, - "MLSUMClusteringS2S (rus-Cyrl)": 41.36, - "MedrxivClusteringP2P": 20.79, - "MedrxivClusteringS2S": 18.18, - "RedditClustering": 26.28, - "RedditClusteringP2P": 40.48, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.55, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.44, - "StackExchangeClustering": 33.51, - "StackExchangeClusteringP2P": 27.98, - "TwentyNewsgroupsClustering": 19.9 + "Model": "voyage-large-2-instruct", + "ArxivClusteringP2P": 51.81, + "ArxivClusteringS2S": 44.73, + "BiorxivClusteringP2P": 46.07, + "BiorxivClusteringS2S": 40.64, + "MedrxivClusteringP2P": 42.94, + "MedrxivClusteringS2S": 41.44, + "RedditClustering": 68.5, + "RedditClusteringP2P": 64.86, + "StackExchangeClustering": 74.16, + "StackExchangeClusteringP2P": 45.1, + "TwentyNewsgroupsClustering": 66.62 } ] }, "PairClassification": { "max_ap": [ { - "Model": "rubert-tiny-turbo", - "OpusparcusPC (rus-Cyrl)": 87.58, - "TERRa (rus-Cyrl)": 56.09 + "Model": "voyage-large-2-instruct", + "SprintDuplicateQuestions": 94.5, + "TwitterSemEval2015": 86.32, + "TwitterURLCorpus": 86.9 }, { - "Model": "rubert-tiny-turbo", - "OpusparcusPC (rus-Cyrl)": 87.58, - "TERRa (rus-Cyrl)": 56.27 + "Model": "voyage-large-2-instruct", + "SprintDuplicateQuestions": 94.53, + "TwitterSemEval2015": 86.32, + "TwitterURLCorpus": 86.9 } ] }, "Reranking": { "map": [ { - "Model": "rubert-tiny-turbo", - "MIRACLReranking (rus-Cyrl)": 47.73 - }, - { - "Model": "rubert-tiny-turbo", - "RuBQReranking (rus-Cyrl)": 62.15 + "Model": "voyage-large-2-instruct", + "AskUbuntuDupQuestions": 64.92, + "MindSmallReranking": 30.97, + "SciDocsRR": 89.34, + "StackOverflowDupQuestions": 55.11 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "rubert-tiny-turbo", - "AILACasedocs": 7.43, - "AILAStatutes": 13.62, - "ARCChallenge": 3.85, - "AlphaNLI": 14.15, - "ArguAna": 32.03, - "ClimateFEVER": 5.56, - "DBPedia": 9.61, - "MIRACLRetrieval (rus-Cyrl)": 37.07, - "RiaNewsRetrieval (rus-Cyrl)": 51.27, - "RuBQRetrieval (rus-Cyrl)": 51.73 + "Model": "voyage-large-2-instruct", + "ArguAna": 64.06, + "BrightRetrieval (theoremqa_questions)": 26.06, + "BrightRetrieval (earth_science)": 25.09, + "BrightRetrieval (leetcode)": 30.6, + "BrightRetrieval (economics)": 19.85, + "BrightRetrieval (robotics)": 11.21, + "BrightRetrieval (psychology)": 24.79, + "BrightRetrieval (aops)": 7.45, + "BrightRetrieval (sustainable_living)": 15.58, + "BrightRetrieval (pony)": 1.48, + "BrightRetrieval (theoremqa_theorems)": 11.1, + "BrightRetrieval (biology)": 23.55, + "BrightRetrieval (stackoverflow)": 15.03, + "CQADupstackRetrieval": 46.6, + "ClimateFEVER": 32.65, + "DBPedia": 46.03, + "FEVER": 91.47, + "FiQA2018": 59.76, + "HotpotQA": 70.86, + "MSMARCO": 40.6, + "NFCorpus": 40.32, + "NQ": 65.92, + "QuoraRetrieval": 87.4, + "SCIDOCS": 24.32, + "SciFact": 79.99, + "TRECCOVID": 85.07, + "Touche2020": 39.16 + } + ], + "recall_at_1": [ + { + "Model": "voyage-large-2-instruct", + "BrightRetrieval (psychology)": 41.58, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (biology)": 34.38, + "BrightRetrieval (stackoverflow)": 13.68, + "BrightRetrieval (pony)": 1.28, + "BrightRetrieval (economics)": 26.7, + "BrightRetrieval (sustainable_living)": 31.1, + "BrightRetrieval (earth_science)": 35.35 } ] }, "STS": { "cosine_spearman": [ { - "Model": "rubert-tiny-turbo", - "RUParaPhraserSTS (rus-Cyrl)": 72.15, - "RuSTSBenchmarkSTS (rus-Cyrl)": 78.48, - "STS22 (cmn-Hans)": 32.83, - "STS22 (deu-Latn_fra-Latn)": 17.5, - "STS22 (pol-Latn_eng-Latn)": 42.08, - "STS22 (rus-Cyrl)": 60.06, - "STS22 (fra-Latn)": 42.0, - "STS22 (deu-Latn)": 8.16, - "STS22 (tur-Latn)": 15.46, - "STS22 (deu-Latn_eng-Latn)": 21.55, - "STS22 (ita-Latn)": 39.69, - "STS22 (pol-Latn)": 9.71, - "STS22 (fra-Latn_pol-Latn)": 39.44, - "STS22 (deu-Latn_pol-Latn)": 25.53, - "STS22 (ara-Arab)": 27.95, - "STS22 (spa-Latn_eng-Latn)": 42.77, - "STS22 (spa-Latn_ita-Latn)": 32.83, - "STS22 (spa-Latn)": 45.31, - "STS22 (cmn-Hans_eng-Latn)": 31.25, - "STS22 (en)": 47.06, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 78.12 + "Model": "voyage-large-2-instruct", + "BIOSSES": 89.24, + "SICK-R": 83.16, + "STS12": 73.34, + "STS13": 88.49, + "STS14": 86.49, + "STS15": 91.13, + "STS16": 85.68, + "STS17": 90.06, + "STS22": 66.32, + "STSBenchmark": 89.22 } ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [ + "cosine_spearman": [ { - "Model": "rubert-tiny-turbo", - "CEDRClassification (rus-Cyrl)": 38.95, - "SensitiveTopicsClassification (rus-Cyrl)": 24.44 + "Model": "voyage-large-2-instruct", + "SummEval": 30.84 } ] }, + "MultilabelClassification": { + "accuracy": [] + }, "InstructionRetrieval": { "p-MRR": [] } }, - "FollowIR-7B": { + "voyage-law-2": { "BitextMining": { "f1": [] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-law-2", + "AmazonReviewsClassification": 41.98, + "MTOPDomainClassification": 90.12, + "MTOPIntentClassification": 62.44, + "MasakhaNEWSClassification": 76.42, + "MassiveIntentClassification": 66.94, + "MassiveScenarioClassification": 72.78 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "voyage-law-2", + "AlloProfClusteringP2P": 62.5, + "AlloProfClusteringS2S": 44.28, + "HALClusteringS2S": 26.36, + "MLSUMClusteringP2P": 44.03, + "MLSUMClusteringS2S": 42.95, + "MasakhaNEWSClusteringP2P": 50.68, + "MasakhaNEWSClusteringS2S": 38.79 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "voyage-law-2", + "OpusparcusPC": 93.06, + "PawsXPairClassification": 61.54 + }, + { + "Model": "voyage-law-2", + "OpusparcusPC": 93.06, + "PawsXPairClassification": 61.54 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "voyage-law-2", + "AlloprofReranking": 72.92, + "SyntecReranking": 91.2 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "voyage-law-2", + "AILACasedocs": 44.56, + "AILAStatutes": 45.51, + "AlloprofRetrieval": 57.28, + "BSARDRetrieval": 11.83, + "GerDaLIRSmall": 44.91, + "LEMBNarrativeQARetrieval": 55.78, + "LEMBQMSumRetrieval": 57.26, + "LEMBSummScreenFDRetrieval": 98.72, + "LEMBWikimQARetrieval": 87.08, + "LeCaRDv2": 72.75, + "LegalBenchConsumerContractsQA": 83.27, + "LegalBenchCorporateLobbying": 95.66, + "LegalQuAD": 67.47, + "LegalSummarization": 68.96, + "MintakaRetrieval": 34.92, + "SyntecRetrieval": 87.33, + "XPQARetrieval": 73.56 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-law-2", + "SICKFr": 74.09, + "STS22": 83.75, + "STSBenchmarkMultilingualSTS": 83.02 + } + ] }, "Summarization": { - "cosine_spearman": [] - }, - "MultilabelClassification": { - "accuracy": [] - }, - "InstructionRetrieval": { - "p-MRR": [ + "cosine_spearman": [ { - "Model": "FollowIR-7B", - "Core17InstructionRetrieval": 16.48, - "News21InstructionRetrieval": 6.26, - "Robust04InstructionRetrieval": 13.72 + "Model": "voyage-law-2", + "SummEvalFr": 30.34 } ] + }, + "MultilabelClassification": { + "accuracy": [] + }, + "InstructionRetrieval": { + "p-MRR": [] } }, - "mistral-embed": { + "voyage-lite-01-instruct": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "mistral-embed", - "AmazonReviewsClassification (fr)": 41.59, - "MTOPDomainClassification (fr)": 90.05, - "MTOPIntentClassification (fr)": 66.09, - "MasakhaNEWSClassification (fra)": 81.4, - "MassiveIntentClassification (fr)": 62.83, - "MassiveScenarioClassification (fr)": 69.71 + "Model": "voyage-lite-01-instruct", + "AmazonCounterfactualClassification": 71.43, + "AmazonPolarityClassification": 96.41, + "AmazonReviewsClassification": 57.06, + "Banking77Classification": 81.64, + "EmotionClassification": 48.29, + "ImdbClassification": 95.49, + "MTOPDomainClassification": 96.3, + "MTOPIntentClassification": 67.93, + "MassiveIntentClassification": 71.29, + "MassiveScenarioClassification": 76.74, + "ToxicConversationsClassification": 75.45, + "TweetSentimentExtractionClassification": 59.44 } ] }, "Clustering": { "v_measure": [ { - "Model": "mistral-embed", - "AlloProfClusteringP2P": 62.01, - "AlloProfClusteringS2S": 49.2, - "HALClusteringS2S": 26.17, - "MLSUMClusteringP2P": 45.28, - "MLSUMClusteringS2S": 42.74, - "MasakhaNEWSClusteringP2P (fra)": 48.13, - "MasakhaNEWSClusteringS2S (fra)": 39.62 + "Model": "voyage-lite-01-instruct", + "ArxivClusteringP2P": 47.92, + "ArxivClusteringS2S": 42.42, + "BiorxivClusteringP2P": 38.72, + "BiorxivClusteringS2S": 36.6, + "MedrxivClusteringP2P": 34.04, + "MedrxivClusteringS2S": 32.81, + "RedditClustering": 61.56, + "RedditClusteringP2P": 65.35, + "StackExchangeClustering": 70.16, + "StackExchangeClusteringP2P": 38.23, + "TwentyNewsgroupsClustering": 53.56 } ] }, "PairClassification": { "max_ap": [ { - "Model": "mistral-embed", - "OpusparcusPC (fr)": 92.61, - "PawsXPairClassification (fr)": 62.02 + "Model": "voyage-lite-01-instruct", + "SprintDuplicateQuestions": 96.01, + "TwitterSemEval2015": 76.87, + "TwitterURLCorpus": 86.84 }, { - "Model": "mistral-embed", - "OpusparcusPC (fr)": 92.64, - "PawsXPairClassification (fr)": 62.05 + "Model": "voyage-lite-01-instruct", + "SprintDuplicateQuestions": 96.01, + "TwitterSemEval2015": 76.87, + "TwitterURLCorpus": 86.84 } ] }, "Reranking": { "map": [ { - "Model": "mistral-embed", - "AlloprofReranking": 72.36, - "SyntecReranking": 88.57 + "Model": "voyage-lite-01-instruct", + "AskUbuntuDupQuestions": 65.77, + "MindSmallReranking": 31.69, + "SciDocsRR": 87.03, + "StackOverflowDupQuestions": 54.49 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "mistral-embed", - "AILACasedocs": 38.2, - "AILAStatutes": 44.81, - "AlloprofRetrieval": 56.84, - "BSARDRetrieval": 2.48, - "GerDaLIRSmall": 17.85, - "LeCaRDv2": 61.12, - "LegalBenchConsumerContractsQA": 80.8, - "LegalBenchCorporateLobbying": 94.11, - "LegalQuAD": 47.17, - "LegalSummarization": 67.39, - "MintakaRetrieval (fr)": 21.73, - "SyntecRetrieval": 78.77, - "XPQARetrieval (fr)": 74.24 + "Model": "voyage-lite-01-instruct", + "ArguAna": 58.73, + "CQADupstackRetrieval": 45.11, + "ClimateFEVER": 37.47, + "DBPedia": 43.42, + "FEVER": 89.71, + "FiQA2018": 44.79, + "HotpotQA": 70.46, + "MSMARCO": 39.66, + "NFCorpus": 43.33, + "NQ": 60.65, + "QuoraRetrieval": 87.83, + "SCIDOCS": 23.19, + "SciFact": 73.64, + "TRECCOVID": 78.92, + "Touche2020": 36.83 } ] }, "STS": { "cosine_spearman": [ { - "Model": "mistral-embed", - "SICKFr": 76.21, - "STS22 (fr)": 82.74, - "STSBenchmarkMultilingualSTS (fr)": 79.72 + "Model": "voyage-lite-01-instruct", + "BIOSSES": 84.85, + "SICK-R": 79.71, + "STS12": 77.09, + "STS13": 88.91, + "STS14": 82.08, + "STS15": 89.21, + "STS16": 84.74, + "STS17": 90.73, + "STS22": 62.1, + "STSBenchmark": 89.86 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "mistral-embed", - "SummEvalFr": 31.47 + "Model": "voyage-lite-01-instruct", + "SummEval": 30.97 } ] }, @@ -23544,94 +22057,120 @@ "p-MRR": [] } }, - "Baichuan-text-embedding": { + "voyage-lite-02-instruct": { "BitextMining": { "f1": [] }, "Classification": { "accuracy": [ { - "Model": "Baichuan-text-embedding", - "AmazonReviewsClassification (zh)": 48.3, - "IFlyTek": 50.75, - "JDReview": 87.69, - "MassiveIntentClassification (zh-CN)": 74.91, - "MassiveScenarioClassification (zh-CN)": 81.28, - "MultilingualSentiment": 76.83, - "OnlineShopping": 94.42, - "TNews": 52.62, - "Waimai": 88.77 + "Model": "voyage-lite-02-instruct", + "AmazonCounterfactualClassification": 88.31, + "AmazonPolarityClassification": 96.32, + "AmazonReviewsClassification": 56.25, + "Banking77Classification": 88.59, + "EmotionClassification": 50.28, + "ImdbClassification": 95.75, + "MTOPDomainClassification": 97.65, + "MTOPIntentClassification": 75.16, + "MassiveIntentClassification": 73.97, + "MassiveScenarioClassification": 83.99, + "ToxicConversationsClassification": 81.75, + "TweetSentimentExtractionClassification": 62.98 } ] }, "Clustering": { "v_measure": [ { - "Model": "Baichuan-text-embedding", - "CLSClusteringP2P": 60.37, - "CLSClusteringS2S": 51.09, - "ThuNewsClusteringP2P": 58.23, - "ThuNewsClusteringS2S": 57.83 + "Model": "voyage-lite-02-instruct", + "ArxivClusteringP2P": 51.95, + "ArxivClusteringS2S": 42.48, + "BiorxivClusteringP2P": 50.15, + "BiorxivClusteringS2S": 42.84, + "MedrxivClusteringP2P": 47.24, + "MedrxivClusteringS2S": 43.48, + "RedditClustering": 63.73, + "RedditClusteringP2P": 64.09, + "StackExchangeClustering": 70.71, + "StackExchangeClusteringP2P": 40.34, + "TwentyNewsgroupsClustering": 59.56 } ] }, "PairClassification": { "max_ap": [ { - "Model": "Baichuan-text-embedding", - "Cmnli": 85.31, - "Ocnli": 79.33 + "Model": "voyage-lite-02-instruct", + "SprintDuplicateQuestions": 98.07, + "TwitterSemEval2015": 74.44, + "TwitterURLCorpus": 88.11 }, { - "Model": "Baichuan-text-embedding", - "Cmnli": 85.33, - "Ocnli": 79.37 + "Model": "voyage-lite-02-instruct", + "SprintDuplicateQuestions": 98.07, + "TwitterSemEval2015": 74.44, + "TwitterURLCorpus": 88.11 } ] }, "Reranking": { "map": [ { - "Model": "Baichuan-text-embedding", - "CMedQAv1": 88.06, - "CMedQAv2": 88.46, - "MMarcoReranking": 34.3, - "T2Reranking": 67.85 + "Model": "voyage-lite-02-instruct", + "AskUbuntuDupQuestions": 63.24, + "MindSmallReranking": 31.48, + "SciDocsRR": 84.68, + "StackOverflowDupQuestions": 53.56 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Baichuan-text-embedding", - "CmedqaRetrieval": 47.64, - "CovidRetrieval": 86.86, - "DuRetrieval": 88.43, - "EcomRetrieval": 66.39, - "MMarcoRetrieval": 80.17, - "MedicalRetrieval": 61.1, - "T2Retrieval": 80.11, - "VideoRetrieval": 74.28 + "Model": "voyage-lite-02-instruct", + "ArguAna": 70.28, + "CQADupstackRetrieval": 46.2, + "ClimateFEVER": 31.95, + "DBPedia": 39.79, + "FEVER": 91.35, + "FiQA2018": 52.51, + "HotpotQA": 75.51, + "MSMARCO": 37.93, + "NFCorpus": 43.7, + "NQ": 64.26, + "QuoraRetrieval": 87.62, + "SCIDOCS": 20.24, + "SciFact": 79.91, + "TRECCOVID": 81.02, + "Touche2020": 26.8 } ] }, "STS": { "cosine_spearman": [ { - "Model": "Baichuan-text-embedding", - "AFQMC": 50.8, - "ATEC": 53.23, - "BQ": 66.49, - "LCQMC": 76.6, - "PAWSX": 47.56, - "QBQTC": 39.96, - "STS22 (zh)": 65.78, - "STSB": 80.14 + "Model": "voyage-lite-02-instruct", + "BIOSSES": 89.7, + "SICK-R": 78.44, + "STS12": 86.46, + "STS13": 87.76, + "STS14": 86.6, + "STS15": 90.1, + "STS16": 86.39, + "STS17": 86.98, + "STS22": 76.89, + "STSBenchmark": 88.56 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-lite-02-instruct", + "SummEval": 31.01 + } + ] }, "MultilabelClassification": { "accuracy": [] @@ -23640,942 +22179,196 @@ "p-MRR": [] } }, - "multilingual-e5-large": { + "voyage-multilingual-2": { "BitextMining": { - "f1": [ - { - "Model": "multilingual-e5-large", - "BornholmBitextMining (dan-Latn)": 29.87, - "BornholmBitextMining": 44.16, - "Tatoeba (bre-Latn_eng-Latn)": 11.1, - "Tatoeba (oci-Latn_eng-Latn)": 54.91, - "Tatoeba (orv-Cyrl_eng-Latn)": 39.87, - "Tatoeba (tur-Latn_eng-Latn)": 96.27, - "Tatoeba (afr-Latn_eng-Latn)": 90.22, - "Tatoeba (dtp-Latn_eng-Latn)": 7.03, - "Tatoeba (glg-Latn_eng-Latn)": 93.34, - "Tatoeba (sqi-Latn_eng-Latn)": 94.7, - "Tatoeba (gla-Latn_eng-Latn)": 59.0, - "Tatoeba (heb-Hebr_eng-Latn)": 86.61, - "Tatoeba (mal-Mlym_eng-Latn)": 97.7, - "Tatoeba (yid-Hebr_eng-Latn)": 76.33, - "Tatoeba (nob-Latn_eng-Latn)": 97.2, - "Tatoeba (tha-Thai_eng-Latn)": 95.38, - "Tatoeba (ind-Latn_eng-Latn)": 92.9, - "Tatoeba (isl-Latn_eng-Latn)": 92.09, - "Tatoeba (ces-Latn_eng-Latn)": 94.89, - "Tatoeba (uig-Arab_eng-Latn)": 72.17, - "Tatoeba (rus-Cyrl_eng-Latn)": 92.32, - "Tatoeba (zsm-Latn_eng-Latn)": 94.53, - "Tatoeba (war-Latn_eng-Latn)": 62.02, - "Tatoeba (jpn-Jpan_eng-Latn)": 95.28, - "Tatoeba (hin-Deva_eng-Latn)": 94.48, - "Tatoeba (mkd-Cyrl_eng-Latn)": 85.63, - "Tatoeba (vie-Latn_eng-Latn)": 97.0, - "Tatoeba (bos-Latn_eng-Latn)": 92.86, - "Tatoeba (arq-Arab_eng-Latn)": 41.56, - "Tatoeba (cha-Latn_eng-Latn)": 27.16, - "Tatoeba (ell-Grek_eng-Latn)": 93.88, - "Tatoeba (hye-Armn_eng-Latn)": 90.92, - "Tatoeba (kaz-Cyrl_eng-Latn)": 79.67, - "Tatoeba (xho-Latn_eng-Latn)": 80.87, - "Tatoeba (arz-Arab_eng-Latn)": 74.73, - "Tatoeba (fin-Latn_eng-Latn)": 95.44, - "Tatoeba (gle-Latn_eng-Latn)": 71.48, - "Tatoeba (ile-Latn_eng-Latn)": 79.16, - "Tatoeba (ber-Tfng_eng-Latn)": 38.9, - "Tatoeba (mon-Cyrl_eng-Latn)": 87.53, - "Tatoeba (aze-Latn_eng-Latn)": 87.61, - "Tatoeba (srp-Cyrl_eng-Latn)": 93.1, - "Tatoeba (tzl-Latn_eng-Latn)": 53.16, - "Tatoeba (dsb-Latn_eng-Latn)": 48.44, - "Tatoeba (pol-Latn_eng-Latn)": 96.6, - "Tatoeba (eus-Latn_eng-Latn)": 77.82, - "Tatoeba (nov-Latn_eng-Latn)": 71.62, - "Tatoeba (tuk-Latn_eng-Latn)": 33.15, - "Tatoeba (ukr-Cyrl_eng-Latn)": 93.32, - "Tatoeba (est-Latn_eng-Latn)": 85.03, - "Tatoeba (deu-Latn_eng-Latn)": 99.07, - "Tatoeba (ido-Latn_eng-Latn)": 83.52, - "Tatoeba (cym-Latn_eng-Latn)": 76.21, - "Tatoeba (ara-Arab_eng-Latn)": 85.48, - "Tatoeba (csb-Latn_eng-Latn)": 36.98, - "Tatoeba (cbk-Latn_eng-Latn)": 69.26, - "Tatoeba (ben-Beng_eng-Latn)": 83.02, - "Tatoeba (slk-Latn_eng-Latn)": 93.13, - "Tatoeba (fao-Latn_eng-Latn)": 72.62, - "Tatoeba (pam-Latn_eng-Latn)": 9.32, - "Tatoeba (hsb-Latn_eng-Latn)": 58.9, - "Tatoeba (lat-Latn_eng-Latn)": 53.37, - "Tatoeba (nno-Latn_eng-Latn)": 91.4, - "Tatoeba (gsw-Latn_eng-Latn)": 51.65, - "Tatoeba (cat-Latn_eng-Latn)": 91.03, - "Tatoeba (urd-Arab_eng-Latn)": 89.21, - "Tatoeba (kzj-Latn_eng-Latn)": 7.91, - "Tatoeba (kor-Hang_eng-Latn)": 90.65, - "Tatoeba (slv-Latn_eng-Latn)": 89.57, - "Tatoeba (ast-Latn_eng-Latn)": 81.76, - "Tatoeba (cmn-Hans_eng-Latn)": 95.28, - "Tatoeba (cor-Latn_eng-Latn)": 6.28, - "Tatoeba (tel-Telu_eng-Latn)": 91.34, - "Tatoeba (kab-Latn_eng-Latn)": 36.54, - "Tatoeba (yue-Hant_eng-Latn)": 88.71, - "Tatoeba (swe-Latn_eng-Latn)": 95.3, - "Tatoeba (pes-Arab_eng-Latn)": 92.14, - "Tatoeba (hun-Latn_eng-Latn)": 94.01, - "Tatoeba (tgl-Latn_eng-Latn)": 92.0, - "Tatoeba (pms-Latn_eng-Latn)": 59.85, - "Tatoeba (lvs-Latn_eng-Latn)": 90.06, - "Tatoeba (swh-Latn_eng-Latn)": 71.61, - "Tatoeba (uzb-Latn_eng-Latn)": 72.35, - "Tatoeba (por-Latn_eng-Latn)": 93.63, - "Tatoeba (ron-Latn_eng-Latn)": 94.87, - "Tatoeba (nds-Latn_eng-Latn)": 69.28, - "Tatoeba (fry-Latn_eng-Latn)": 63.43, - "Tatoeba (khm-Khmr_eng-Latn)": 59.96, - "Tatoeba (nld-Latn_eng-Latn)": 96.63, - "Tatoeba (lit-Latn_eng-Latn)": 88.48, - "Tatoeba (awa-Deva_eng-Latn)": 72.27, - "Tatoeba (amh-Ethi_eng-Latn)": 80.69, - "Tatoeba (jav-Latn_eng-Latn)": 75.46, - "Tatoeba (mar-Deva_eng-Latn)": 88.58, - "Tatoeba (spa-Latn_eng-Latn)": 97.1, - "Tatoeba (lfn-Latn_eng-Latn)": 62.91, - "Tatoeba (ceb-Latn_eng-Latn)": 55.31, - "Tatoeba (bul-Cyrl_eng-Latn)": 92.93, - "Tatoeba (tat-Cyrl_eng-Latn)": 73.51, - "Tatoeba (kur-Latn_eng-Latn)": 66.83, - "Tatoeba (mhr-Cyrl_eng-Latn)": 6.79, - "Tatoeba (epo-Latn_eng-Latn)": 96.01, - "Tatoeba (kat-Geor_eng-Latn)": 84.09, - "Tatoeba (ina-Latn_eng-Latn)": 93.47, - "Tatoeba (tam-Taml_eng-Latn)": 88.23, - "Tatoeba (ita-Latn_eng-Latn)": 93.29, - "Tatoeba (hrv-Latn_eng-Latn)": 96.15, - "Tatoeba (fra-Latn_eng-Latn)": 93.42, - "Tatoeba (wuu-Hans_eng-Latn)": 86.37, - "Tatoeba (dan-Latn_eng-Latn)": 95.08, - "Tatoeba (max-Deva_eng-Latn)": 63.41, - "Tatoeba (ang-Latn_eng-Latn)": 40.18, - "Tatoeba (bel-Cyrl_eng-Latn)": 91.08, - "Tatoeba (swg-Latn_eng-Latn)": 55.64 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "multilingual-e5-large", - "AllegroReviews (pol-Latn)": 41.04, - "AllegroReviews": 41.14, - "AmazonCounterfactualClassification (en-ext)": 78.73, - "AmazonCounterfactualClassification (en)": 78.67, - "AmazonCounterfactualClassification (deu-Latn)": 68.66, - "AmazonCounterfactualClassification (jpn-Jpan)": 78.8, - "AmazonPolarityClassification": 93.26, - "AmazonReviewsClassification (en)": 49.2, - "AmazonReviewsClassification (deu-Latn)": 46.5, - "AmazonReviewsClassification (spa-Latn)": 44.35, - "AmazonReviewsClassification (fra-Latn)": 42.55, - "AmazonReviewsClassification (jpn-Jpan)": 41.71, - "AmazonReviewsClassification (cmn-Hans)": 38.87, - "AmazonReviewsClassification (fr)": 41.91, - "AngryTweetsClassification (dan-Latn)": 57.69, - "AngryTweetsClassification": 54.95, - "Banking77Classification": 75.88, - "CBD (pol-Latn)": 69.84, - "CBD": 69.9, - "DKHateClassification": 66.02, - "DanishPoliticalCommentsClassification (dan-Latn)": 39.43, - "DanishPoliticalCommentsClassification": 38.27, - "EmotionClassification": 47.58, - "GeoreviewClassification (rus-Cyrl)": 49.69, - "HeadlineClassification (rus-Cyrl)": 77.19, - "IFlyTek (cmn-Hans)": 41.86, - "IFlyTek": 45.47, - "ImdbClassification": 90.23, - "InappropriatenessClassification (rus-Cyrl)": 61.59, - "JDReview (cmn-Hans)": 80.54, - "JDReview": 80.99, - "KinopoiskClassification (rus-Cyrl)": 56.59, - "LccSentimentClassification (dan-Latn)": 61.53, - "LccSentimentClassification": 59.6, - "MTOPDomainClassification (en)": 91.81, - "MTOPDomainClassification (deu-Latn)": 90.44, - "MTOPDomainClassification (spa-Latn)": 88.34, - "MTOPDomainClassification (fra-Latn)": 86.23, - "MTOPDomainClassification (hin-Deva)": 86.84, - "MTOPDomainClassification (tha-Thai)": 86.88, - "MTOPDomainClassification (fr)": 86.41, - "MTOPIntentClassification (en)": 64.29, - "MTOPIntentClassification (deu-Latn)": 65.97, - "MTOPIntentClassification (spa-Latn)": 61.9, - "MTOPIntentClassification (fra-Latn)": 56.25, - "MTOPIntentClassification (hin-Deva)": 59.17, - "MTOPIntentClassification (tha-Thai)": 62.59, - "MTOPIntentClassification (fr)": 59.43, - "MasakhaNEWSClassification (amh-Ethi)": 83.7, - "MasakhaNEWSClassification (eng)": 78.26, - "MasakhaNEWSClassification (fra-Latn)": 76.11, - "MasakhaNEWSClassification (hau-Latn)": 76.17, - "MasakhaNEWSClassification (ibo-Latn)": 70.05, - "MasakhaNEWSClassification (lin-Latn)": 75.89, - "MasakhaNEWSClassification (lug-Latn)": 73.63, - "MasakhaNEWSClassification (orm-Ethi)": 80.31, - "MasakhaNEWSClassification (pcm-Latn)": 89.15, - "MasakhaNEWSClassification (run-Latn)": 76.55, - "MasakhaNEWSClassification (sna-Latn)": 86.99, - "MasakhaNEWSClassification (som-Latn)": 64.63, - "MasakhaNEWSClassification (swa-Latn)": 73.42, - "MasakhaNEWSClassification (tir-Ethi)": 72.06, - "MasakhaNEWSClassification (xho-Latn)": 82.56, - "MasakhaNEWSClassification (yor-Latn)": 81.09, - "MasakhaNEWSClassification (fra)": 79.38, - "MassiveIntentClassification (heb-Hebr)": 62.44, - "MassiveIntentClassification (ind-Latn)": 63.51, - "MassiveIntentClassification (fin-Latn)": 64.28, - "MassiveIntentClassification (hun-Latn)": 64.0, - "MassiveIntentClassification (nob-Latn)": 64.54, - "MassiveIntentClassification (vie-Latn)": 63.39, - "MassiveIntentClassification (kor-Kore)": 63.92, - "MassiveIntentClassification (tam-Taml)": 53.41, - "MassiveIntentClassification (tel-Telu)": 53.96, - "MassiveIntentClassification (dan-Latn)": 63.7, - "MassiveIntentClassification (ara-Arab)": 54.1, - "MassiveIntentClassification (en)": 68.51, - "MassiveIntentClassification (hin-Deva)": 60.93, - "MassiveIntentClassification (ben-Beng)": 55.6, - "MassiveIntentClassification (tur-Latn)": 64.61, - "MassiveIntentClassification (amh-Ethi)": 45.48, - "MassiveIntentClassification (spa-Latn)": 64.01, - "MassiveIntentClassification (lav-Latn)": 58.31, - "MassiveIntentClassification (mya-Mymr)": 49.73, - "MassiveIntentClassification (sqi-Latn)": 57.3, - "MassiveIntentClassification (tha-Thai)": 62.75, - "MassiveIntentClassification (slv-Latn)": 59.38, - "MassiveIntentClassification (mal-Mlym)": 57.58, - "MassiveIntentClassification (isl-Latn)": 53.3, - "MassiveIntentClassification (msa-Latn)": 58.49, - "MassiveIntentClassification (nld-Latn)": 65.0, - "MassiveIntentClassification (aze-Latn)": 54.68, - "MassiveIntentClassification (rus-Cyrl)": 65.76, - "MassiveIntentClassification (swe-Latn)": 66.52, - "MassiveIntentClassification (ron-Latn)": 59.76, - "MassiveIntentClassification (cmo-Hant)": 58.78, - "MassiveIntentClassification (jav-Latn)": 48.96, - "MassiveIntentClassification (fas-Arab)": 63.74, - "MassiveIntentClassification (pol-Latn)": 65.09, - "MassiveIntentClassification (khm-Khmr)": 34.88, - "MassiveIntentClassification (jpn-Jpan)": 67.11, - "MassiveIntentClassification (kan-Knda)": 53.45, - "MassiveIntentClassification (fra-Latn)": 63.37, - "MassiveIntentClassification (kat-Geor)": 41.45, - "MassiveIntentClassification (swa-Latn)": 47.69, - "MassiveIntentClassification (cmo-Hans)": 66.23, - "MassiveIntentClassification (urd-Arab)": 54.6, - "MassiveIntentClassification (tgl-Latn)": 54.77, - "MassiveIntentClassification (cym-Latn)": 44.22, - "MassiveIntentClassification (deu-Latn)": 63.82, - "MassiveIntentClassification (afr-Latn)": 53.69, - "MassiveIntentClassification (ita-Latn)": 63.89, - "MassiveIntentClassification (ell-Grek)": 64.34, - "MassiveIntentClassification (mon-Cyrl)": 49.6, - "MassiveIntentClassification (hye-Armn)": 50.89, - "MassiveIntentClassification (por-Latn)": 65.6, - "MassiveIntentClassification (da)": 60.16, - "MassiveIntentClassification (nb)": 59.83, - "MassiveIntentClassification (sv)": 61.78, - "MassiveIntentClassification (pl)": 65.07, - "MassiveScenarioClassification (en)": 73.04, - "MassiveScenarioClassification (tam-Taml)": 58.76, - "MassiveScenarioClassification (mal-Mlym)": 63.17, - "MassiveScenarioClassification (por-Latn)": 68.33, - "MassiveScenarioClassification (heb-Hebr)": 67.72, - "MassiveScenarioClassification (ara-Arab)": 61.0, - "MassiveScenarioClassification (pol-Latn)": 69.83, - "MassiveScenarioClassification (vie-Latn)": 68.91, - "MassiveScenarioClassification (msa-Latn)": 63.55, - "MassiveScenarioClassification (slv-Latn)": 65.33, - "MassiveScenarioClassification (hun-Latn)": 70.53, - "MassiveScenarioClassification (mya-Mymr)": 54.03, - "MassiveScenarioClassification (sqi-Latn)": 63.79, - "MassiveScenarioClassification (fin-Latn)": 68.62, - "MassiveScenarioClassification (tel-Telu)": 59.49, - "MassiveScenarioClassification (rus-Cyrl)": 70.85, - "MassiveScenarioClassification (amh-Ethi)": 52.69, - "MassiveScenarioClassification (hin-Deva)": 66.85, - "MassiveScenarioClassification (fra-Latn)": 68.74, - "MassiveScenarioClassification (cym-Latn)": 51.25, - "MassiveScenarioClassification (tur-Latn)": 68.12, - "MassiveScenarioClassification (ron-Latn)": 66.06, - "MassiveScenarioClassification (cmo-Hans)": 72.25, - "MassiveScenarioClassification (kan-Knda)": 59.36, - "MassiveScenarioClassification (nob-Latn)": 70.44, - "MassiveScenarioClassification (deu-Latn)": 71.25, - "MassiveScenarioClassification (ell-Grek)": 69.74, - "MassiveScenarioClassification (spa-Latn)": 69.07, - "MassiveScenarioClassification (dan-Latn)": 71.18, - "MassiveScenarioClassification (urd-Arab)": 60.89, - "MassiveScenarioClassification (afr-Latn)": 62.35, - "MassiveScenarioClassification (kor-Kore)": 70.54, - "MassiveScenarioClassification (ben-Beng)": 61.85, - "MassiveScenarioClassification (jpn-Jpan)": 73.16, - "MassiveScenarioClassification (aze-Latn)": 58.49, - "MassiveScenarioClassification (fas-Arab)": 67.55, - "MassiveScenarioClassification (swe-Latn)": 72.77, - "MassiveScenarioClassification (cmo-Hant)": 64.35, - "MassiveScenarioClassification (kat-Geor)": 47.82, - "MassiveScenarioClassification (nld-Latn)": 71.11, - "MassiveScenarioClassification (swa-Latn)": 56.27, - "MassiveScenarioClassification (hye-Armn)": 55.76, - "MassiveScenarioClassification (isl-Latn)": 60.74, - "MassiveScenarioClassification (mon-Cyrl)": 55.37, - "MassiveScenarioClassification (ita-Latn)": 69.45, - "MassiveScenarioClassification (tgl-Latn)": 60.71, - "MassiveScenarioClassification (khm-Khmr)": 41.14, - "MassiveScenarioClassification (tha-Thai)": 69.06, - "MassiveScenarioClassification (lav-Latn)": 64.28, - "MassiveScenarioClassification (jav-Latn)": 56.24, - "MassiveScenarioClassification (ind-Latn)": 69.43, - "MassiveScenarioClassification (da)": 67.46, - "MassiveScenarioClassification (nb)": 66.18, - "MassiveScenarioClassification (sv)": 69.15, - "MassiveScenarioClassification (pl)": 69.82, - "MultilingualSentiment (cmn-Hans)": 70.81, - "MultilingualSentiment": 68.58, - "NoRecClassification (nob-Latn)": 58.43, - "NoRecClassification": 62.76, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 80.15, - "NordicLangClassification": 82.29, - "NorwegianParliament": 60.36, - "OnlineShopping (cmn-Hans)": 90.45, - "OnlineShopping": 90.81, - "PAC (pol-Latn)": 70.33, - "PAC": 70.37, - "PolEmo2.0-IN (pol-Latn)": 77.06, - "PolEmo2.0-IN": 77.06, - "PolEmo2.0-OUT (pol-Latn)": 53.48, - "PolEmo2.0-OUT": 53.38, - "RuReviewsClassification (rus-Cyrl)": 65.28, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 58.2, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.91, - "ScalaDaClassification": 50.77, - "ScalaNbClassification": 50.44, - "TNews (cmn-Hans)": 48.8, - "TNews": 48.38, - "ToxicConversationsClassification": 66.01, - "TweetSentimentExtractionClassification": 62.8, - "Waimai (cmn-Hans)": 86.3, - "Waimai": 85.02 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "multilingual-e5-large", - "8TagsClustering": 33.88, - "AlloProfClusteringP2P": 62.99, - "AlloProfClusteringS2S": 32.26, - "BiorxivClusteringP2P": 35.5, - "BiorxivClusteringS2S": 33.3, - "CLSClusteringP2P": 40.68, - "CLSClusteringS2S": 38.59, - "GeoreviewClusteringP2P (rus-Cyrl)": 59.59, - "HALClusteringS2S": 22.44, - "MLSUMClusteringP2P (rus-Cyrl)": 42.79, - "MLSUMClusteringP2P": 44.04, - "MLSUMClusteringS2S (rus-Cyrl)": 44.32, - "MLSUMClusteringS2S": 37.65, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.16, - "MasakhaNEWSClusteringP2P (eng)": 61.1, - "MasakhaNEWSClusteringP2P (fra-Latn)": 41.66, - "MasakhaNEWSClusteringP2P (hau-Latn)": 60.7, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 48.41, - "MasakhaNEWSClusteringP2P (lin-Latn)": 57.69, - "MasakhaNEWSClusteringP2P (lug-Latn)": 71.95, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 60.14, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 80.84, - "MasakhaNEWSClusteringP2P (run-Latn)": 59.91, - "MasakhaNEWSClusteringP2P (sna-Latn)": 53.3, - "MasakhaNEWSClusteringP2P (som-Latn)": 34.38, - "MasakhaNEWSClusteringP2P (swa-Latn)": 33.25, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 54.21, - "MasakhaNEWSClusteringP2P (xho-Latn)": 41.12, - "MasakhaNEWSClusteringP2P (yor-Latn)": 36.22, - "MasakhaNEWSClusteringP2P (fra)": 40.94, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.24, - "MasakhaNEWSClusteringS2S (eng)": 53.93, - "MasakhaNEWSClusteringS2S (fra-Latn)": 39.84, - "MasakhaNEWSClusteringS2S (hau-Latn)": 19.24, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 28.88, - "MasakhaNEWSClusteringS2S (lin-Latn)": 42.22, - "MasakhaNEWSClusteringS2S (lug-Latn)": 43.63, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.29, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 59.77, - "MasakhaNEWSClusteringS2S (run-Latn)": 51.46, - "MasakhaNEWSClusteringS2S (sna-Latn)": 48.14, - "MasakhaNEWSClusteringS2S (som-Latn)": 25.14, - "MasakhaNEWSClusteringS2S (swa-Latn)": 7.28, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51, - "MasakhaNEWSClusteringS2S (xho-Latn)": 30.98, - "MasakhaNEWSClusteringS2S (yor-Latn)": 34.09, - "MasakhaNEWSClusteringS2S (fra)": 30.56, - "MedrxivClusteringP2P": 31.7, - "MedrxivClusteringS2S": 29.76, - "RedditClustering": 46.91, - "RedditClusteringP2P": 63.0, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 51.98, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.12, - "StackExchangeClustering": 58.37, - "StackExchangeClusteringP2P": 32.9, - "ThuNewsClusteringP2P": 58.05, - "ThuNewsClusteringS2S": 55.59, - "TwentyNewsgroupsClustering": 39.4 + "Model": "voyage-multilingual-2", + "AmazonReviewsClassification": 43.36, + "MTOPDomainClassification": 90.33, + "MTOPIntentClassification": 60.52, + "MasakhaNEWSClassification": 74.81, + "MassiveIntentClassification": 68.06, + "MassiveScenarioClassification": 74.29 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "voyage-multilingual-2", + "AlloProfClusteringP2P": 65.37, + "AlloProfClusteringS2S": 47.03, + "HALClusteringS2S": 27.67, + "MLSUMClusteringP2P": 45.99, + "MLSUMClusteringS2S": 45.57, + "MasakhaNEWSClusteringP2P": 44.53, + "MasakhaNEWSClusteringS2S": 49.8 } ] }, "PairClassification": { "max_ap": [ { - "Model": "multilingual-e5-large", - "CDSC-E (pol-Latn)": 74.47, - "OpusparcusPC (deu-Latn)": 97.27, - "OpusparcusPC (en)": 98.74, - "OpusparcusPC (fin-Latn)": 94.26, - "OpusparcusPC (fra-Latn)": 93.68, - "OpusparcusPC (rus-Cyrl)": 89.64, - "OpusparcusPC (swe-Latn)": 94.98, - "PSC (pol-Latn)": 99.4, - "PawsXPairClassification (deu-Latn)": 56.81, - "PawsXPairClassification (en)": 62.97, - "PawsXPairClassification (spa-Latn)": 56.85, - "PawsXPairClassification (fra-Latn)": 58.68, - "PawsXPairClassification (jpn-Hira)": 50.7, - "PawsXPairClassification (kor-Hang)": 52.08, - "PawsXPairClassification (cmn-Hans)": 56.82, - "SICK-E-PL (pol-Latn)": 75.95, - "SprintDuplicateQuestions": 93.14, - "TERRa (rus-Cyrl)": 58.4, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 85.83 - }, - { - "Model": "multilingual-e5-large", - "CDSC-E (pol-Latn)": 74.47, - "CDSC-E": 74.47, - "Cmnli": 78.18, - "Ocnli": 61.6, - "OpusparcusPC (deu-Latn)": 97.27, - "OpusparcusPC (en)": 98.74, - "OpusparcusPC (fin-Latn)": 94.26, - "OpusparcusPC (fra-Latn)": 93.68, - "OpusparcusPC (rus-Cyrl)": 89.64, - "OpusparcusPC (swe-Latn)": 94.98, - "OpusparcusPC (fr)": 93.89, - "PPC": 92.18, - "PSC (pol-Latn)": 99.4, - "PSC": 99.39, - "PawsXPairClassification (deu-Latn)": 57.14, - "PawsXPairClassification (en)": 62.97, - "PawsXPairClassification (spa-Latn)": 56.87, - "PawsXPairClassification (fra-Latn)": 58.69, - "PawsXPairClassification (jpn-Hira)": 50.84, - "PawsXPairClassification (kor-Hang)": 52.22, - "PawsXPairClassification (cmn-Hans)": 56.95, - "PawsXPairClassification (fr)": 58.61, - "SICK-E-PL (pol-Latn)": 75.95, - "SICK-E-PL": 75.96, - "SprintDuplicateQuestions": 93.14, - "TERRa (rus-Cyrl)": 58.42, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 85.83 + "Model": "voyage-multilingual-2", + "OpusparcusPC": 93.68, + "PawsXPairClassification": 63.64 }, { - "Model": "multilingual-e5-large", - "CDSC-E": 74.47, - "Cmnli": 78.18, - "Ocnli": 61.6, - "OpusparcusPC (fr)": 93.89, - "PPC": 92.18, - "PSC": 99.39, - "PawsXPairClassification (fr)": 58.5, - "SICK-E-PL": 75.96 + "Model": "voyage-multilingual-2", + "OpusparcusPC": 93.68, + "PawsXPairClassification": 63.71 } ] }, "Reranking": { "map": [ { - "Model": "multilingual-e5-large", - "AlloprofReranking (fra-Latn)": 69.44, - "AlloprofReranking": 57.37, - "AskUbuntuDupQuestions": 59.24, - "CMedQAv1": 68.25, - "CMedQAv2": 68.56, - "MMarcoReranking (cmn-Hans)": 29.12, - "MMarcoReranking": 21.34, - "MindSmallReranking": 30.24, - "RuBQReranking (rus-Cyrl)": 75.6, - "SciDocsRR": 84.22, - "StackOverflowDupQuestions": 50.14, - "SyntecReranking (fra-Latn)": 85.45, - "SyntecReranking": 86.9, - "T2Reranking (cmn-Hans)": 66.32, - "T2Reranking": 65.83 - }, - { - "Model": "multilingual-e5-large", - "MIRACLReranking (rus-Cyrl)": 63.71 + "Model": "voyage-multilingual-2", + "AlloprofReranking": 74.78, + "SyntecReranking": 90.4 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "multilingual-e5-large", - "AILACasedocs": 26.43, - "AILAStatutes": 20.84, - "ARCChallenge": 10.83, - "AlloprofRetrieval (fra-Latn)": 39.34, - "AlloprofRetrieval": 38.15, - "AlphaNLI": 13.59, - "AppsRetrieval (eng-Latn_python-Code)": 32.55, - "ArguAna": 54.36, - "ArguAna-PL (pol-Latn)": 52.99, - "ArguAna-PL": 53.02, - "BSARDRetrieval (fra-Latn)": 21.28, - "BSARDRetrieval": 0.27, - "CmedqaRetrieval (cmn-Hans)": 28.66, - "CmedqaRetrieval": 28.67, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 42.78, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 74.26, - "CodeSearchNetCCRetrieval (python-Code)": 84.45, - "CodeSearchNetCCRetrieval (javascript-Code)": 77.67, - "CodeSearchNetCCRetrieval (go-Code)": 72.08, - "CodeSearchNetCCRetrieval (ruby-Code)": 81.94, - "CodeSearchNetCCRetrieval (java-Code)": 78.65, - "CodeSearchNetCCRetrieval (php-Code)": 72.21, - "CodeSearchNetRetrieval (python-Code)": 89.42, - "CodeSearchNetRetrieval (javascript-Code)": 75.54, - "CodeSearchNetRetrieval (go-Code)": 91.8, - "CodeSearchNetRetrieval (ruby-Code)": 81.43, - "CodeSearchNetRetrieval (java-Code)": 82.05, - "CodeSearchNetRetrieval (php-Code)": 84.5, - "CodeTransOceanContest (python-Code_c++-Code)": 74.03, - "CodeTransOceanDL": 31.28, - "CosQA (eng-Latn_python-Code)": 34.8, - "CovidRetrieval (cmn-Hans)": 75.61, - "CovidRetrieval": 75.51, - "DBPedia-PL": 35.82, - "DuRetrieval (cmn-Hans)": 85.3, - "DuRetrieval": 85.32, - "EcomRetrieval (cmn-Hans)": 54.67, - "EcomRetrieval": 54.75, - "FiQA-PL (pol-Latn)": 32.97, - "FiQA-PL": 33.0, - "FiQA2018": 43.81, - "GerDaLIRSmall (deu-Latn)": 15.72, - "HellaSwag": 27.35, - "HotpotQA-PL": 67.41, - "LEMBNarrativeQARetrieval": 24.22, - "LEMBQMSumRetrieval": 24.26, - "LEMBSummScreenFDRetrieval": 71.12, - "LEMBWikimQARetrieval": 56.8, - "LeCaRDv2 (zho-Hans)": 55.83, - "LegalBenchConsumerContractsQA": 73.3, - "LegalBenchCorporateLobbying": 89.72, - "LegalQuAD (deu-Latn)": 43.17, - "LegalSummarization": 62.1, - "MIRACLRetrieval (rus-Cyrl)": 67.33, - "MMarcoRetrieval (cmn-Hans)": 79.2, - "MMarcoRetrieval": 79.2, - "MSMARCO-PL": 33.38, - "MedicalRetrieval (cmn-Hans)": 51.44, - "MedicalRetrieval": 51.44, - "MintakaRetrieval (ara-Arab)": 26.5, - "MintakaRetrieval (deu-Latn)": 32.77, - "MintakaRetrieval (spa-Latn)": 34.23, - "MintakaRetrieval (fra-Latn)": 34.24, - "MintakaRetrieval (hin-Deva)": 27.45, - "MintakaRetrieval (ita-Latn)": 33.84, - "MintakaRetrieval (jpn-Hira)": 26.45, - "MintakaRetrieval (por-Latn)": 35.9, - "MintakaRetrieval (fr)": 25.2, - "NFCorpus": 33.95, - "NFCorpus-PL (pol-Latn)": 30.21, - "NFCorpus-PL": 30.24, - "NQ-PL": 52.79, - "PIQA": 28.82, - "Quail": 4.85, - "Quora-PL": 83.65, - "RARbCode": 58.92, - "RARbMath": 67.32, - "RiaNewsRetrieval (rus-Cyrl)": 80.67, - "RuBQRetrieval (rus-Cyrl)": 74.13, - "SCIDOCS": 17.45, - "SCIDOCS-PL (pol-Latn)": 13.82, - "SCIDOCS-PL": 13.81, - "SIQA": 5.36, - "SciFact": 70.42, - "SciFact-PL (pol-Latn)": 65.66, - "SciFact-PL": 65.66, - "SpartQA": 5.64, - "StackOverflowQA": 88.89, - "SyntecRetrieval (fra-Latn)": 82.39, - "SyntecRetrieval": 81.07, - "SyntheticText2SQL (eng-Latn_sql-Code)": 53.07, - "T2Retrieval (cmn-Hans)": 76.07, - "T2Retrieval": 76.11, - "TRECCOVID": 71.21, - "TRECCOVID-PL (pol-Latn)": 69.9, - "TRECCOVID-PL": 70.03, - "TempReasonL1": 1.14, - "TempReasonL2Fact": 42.96, - "TempReasonL2Pure": 2.05, - "TempReasonL3Fact": 38.22, - "TempReasonL3Pure": 8.31, - "Touche2020": 23.13, - "VideoRetrieval (cmn-Hans)": 58.28, - "VideoRetrieval": 58.25, - "WinoGrande": 54.99, - "XPQARetrieval (ara-Arab_ara-Arab)": 43.69, - "XPQARetrieval (eng-Latn_ara-Arab)": 30.86, - "XPQARetrieval (ara-Arab_eng-Latn)": 39.11, - "XPQARetrieval (deu-Latn_deu-Latn)": 76.83, - "XPQARetrieval (eng-Latn_deu-Latn)": 42.87, - "XPQARetrieval (deu-Latn_eng-Latn)": 68.25, - "XPQARetrieval (spa-Latn_spa-Latn)": 61.77, - "XPQARetrieval (eng-Latn_spa-Latn)": 37.55, - "XPQARetrieval (spa-Latn_eng-Latn)": 52.86, - "XPQARetrieval (fra-Latn_fra-Latn)": 61.38, - "XPQARetrieval (eng-Latn_fra-Latn)": 39.12, - "XPQARetrieval (fra-Latn_eng-Latn)": 57.93, - "XPQARetrieval (hin-Deva_hin-Deva)": 71.07, - "XPQARetrieval (eng-Latn_hin-Deva)": 32.39, - "XPQARetrieval (hin-Deva_eng-Latn)": 68.31, - "XPQARetrieval (ita-Latn_ita-Latn)": 74.32, - "XPQARetrieval (eng-Latn_ita-Latn)": 37.95, - "XPQARetrieval (ita-Latn_eng-Latn)": 64.54, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 74.11, - "XPQARetrieval (eng-Latn_jpn-Hira)": 38.31, - "XPQARetrieval (jpn-Hira_eng-Latn)": 65.42, - "XPQARetrieval (kor-Hang_kor-Hang)": 35.71, - "XPQARetrieval (eng-Latn_kor-Hang)": 31.09, - "XPQARetrieval (kor-Hang_eng-Latn)": 34.02, - "XPQARetrieval (pol-Latn_pol-Latn)": 51.01, - "XPQARetrieval (eng-Latn_pol-Latn)": 30.49, - "XPQARetrieval (pol-Latn_eng-Latn)": 44.66, - "XPQARetrieval (por-Latn_por-Latn)": 41.1, - "XPQARetrieval (eng-Latn_por-Latn)": 22.03, - "XPQARetrieval (por-Latn_eng-Latn)": 35.15, - "XPQARetrieval (tam-Taml_tam-Taml)": 39.47, - "XPQARetrieval (eng-Latn_tam-Taml)": 17.33, - "XPQARetrieval (tam-Taml_eng-Latn)": 33.67, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 66.27, - "XPQARetrieval (eng-Latn_cmn-Hans)": 26.24, - "XPQARetrieval (cmn-Hans_eng-Latn)": 55.15, - "XPQARetrieval (fr)": 66.15 - }, - { - "Model": "multilingual-e5-large", - "LEMBNeedleRetrieval": 28.0, - "LEMBPasskeyRetrieval": 38.25 + "Model": "voyage-multilingual-2", + "AlloprofRetrieval": 58.27, + "BSARDRetrieval": 5.14, + "LEMBNarrativeQARetrieval": 64.69, + "LEMBQMSumRetrieval": 51.49, + "LEMBSummScreenFDRetrieval": 99.11, + "LEMBWikimQARetrieval": 87.49, + "MintakaRetrieval": 49.19, + "SyntecRetrieval": 87.28, + "XPQARetrieval": 72.92 } ] }, "STS": { "cosine_spearman": [ { - "Model": "multilingual-e5-large", - "AFQMC (cmn-Hans)": 33.01, - "ATEC (cmn-Hans)": 39.8, - "BIOSSES": 82.49, - "BQ (cmn-Hans)": 46.44, - "CDSC-R (pol-Latn)": 91.0, - "LCQMC (cmn-Hans)": 75.95, - "PAWSX (cmn-Hans)": 14.63, - "RUParaPhraserSTS (rus-Cyrl)": 71.82, - "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15, - "SICK-R": 80.23, - "SICK-R-PL (pol-Latn)": 75.08, - "SICKFr (fra-Latn)": 78.81, - "STS12": 80.02, - "STS13": 81.55, - "STS14": 77.72, - "STS15": 89.31, - "STS16": 85.79, - "STS17 (fra-Latn_eng-Latn)": 85.62, - "STS17 (nld-Latn_eng-Latn)": 85.29, - "STS17 (spa-Latn)": 86.71, - "STS17 (ara-Arab)": 77.83, - "STS17 (spa-Latn_eng-Latn)": 80.74, - "STS17 (en-en)": 88.12, - "STS17 (kor-Hang)": 82.27, - "STS17 (eng-Latn_ara-Arab)": 75.03, - "STS17 (eng-Latn_tur-Latn)": 71.22, - "STS17 (eng-Latn_deu-Latn)": 86.15, - "STS17 (ita-Latn_eng-Latn)": 84.52, - "STS22 (deu-Latn)": 56.58, - "STS22 (deu-Latn_fra-Latn)": 67.96, - "STS22 (ara-Arab)": 56.95, - "STS22 (rus-Cyrl)": 59.89, - "STS22 (deu-Latn_eng-Latn)": 56.59, - "STS22 (en)": 63.66, - "STS22 (tur-Latn)": 63.56, - "STS22 (pol-Latn_eng-Latn)": 65.54, - "STS22 (cmn-Hans)": 66.82, - "STS22 (ita-Latn)": 76.99, - "STS22 (pol-Latn)": 34.65, - "STS22 (spa-Latn_ita-Latn)": 68.92, - "STS22 (fra-Latn)": 76.77, - "STS22 (cmn-Hans_eng-Latn)": 65.95, - "STS22 (fra-Latn_pol-Latn)": 50.71, - "STS22 (spa-Latn_eng-Latn)": 72.51, - "STS22 (spa-Latn)": 64.6, - "STS22 (deu-Latn_pol-Latn)": 49.58, - "STSB (cmn-Hans)": 81.08, - "STSBenchmark": 87.29, - "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05, - "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27, - "STSBenchmarkMultilingualSTS (por-Latn)": 73.31, - "STSBenchmarkMultilingualSTS (en)": 87.29, - "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22, - "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75, - "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63, - "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28 - }, - { - "Model": "multilingual-e5-large", - "AFQMC (cmn-Hans)": 33.01, - "ATEC (cmn-Hans)": 39.8, - "BIOSSES": 82.49, - "BQ (cmn-Hans)": 46.44, - "CDSC-R (pol-Latn)": 91.0, - "LCQMC (cmn-Hans)": 75.95, - "PAWSX (cmn-Hans)": 14.63, - "RUParaPhraserSTS (rus-Cyrl)": 71.82, - "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15, - "SICK-R": 80.23, - "SICK-R-PL (pol-Latn)": 75.08, - "SICKFr (fra-Latn)": 78.81, - "STS12": 80.02, - "STS13": 81.55, - "STS14": 77.72, - "STS15": 89.31, - "STS16": 85.79, - "STS17 (fra-Latn_eng-Latn)": 85.62, - "STS17 (nld-Latn_eng-Latn)": 85.29, - "STS17 (spa-Latn)": 86.71, - "STS17 (ara-Arab)": 77.83, - "STS17 (spa-Latn_eng-Latn)": 80.74, - "STS17 (en-en)": 88.12, - "STS17 (kor-Hang)": 82.27, - "STS17 (eng-Latn_ara-Arab)": 75.03, - "STS17 (eng-Latn_tur-Latn)": 71.22, - "STS17 (eng-Latn_deu-Latn)": 86.15, - "STS17 (ita-Latn_eng-Latn)": 84.52, - "STS22 (deu-Latn)": 56.58, - "STS22 (deu-Latn_fra-Latn)": 67.96, - "STS22 (ara-Arab)": 56.95, - "STS22 (rus-Cyrl)": 59.89, - "STS22 (deu-Latn_eng-Latn)": 56.59, - "STS22 (en)": 63.66, - "STS22 (tur-Latn)": 63.56, - "STS22 (pol-Latn_eng-Latn)": 65.54, - "STS22 (cmn-Hans)": 66.82, - "STS22 (ita-Latn)": 76.99, - "STS22 (pol-Latn)": 34.65, - "STS22 (spa-Latn_ita-Latn)": 68.92, - "STS22 (fra-Latn)": 76.77, - "STS22 (cmn-Hans_eng-Latn)": 65.95, - "STS22 (fra-Latn_pol-Latn)": 50.71, - "STS22 (spa-Latn_eng-Latn)": 72.51, - "STS22 (spa-Latn)": 64.6, - "STS22 (deu-Latn_pol-Latn)": 49.58, - "STSB (cmn-Hans)": 81.08, - "STSBenchmark": 87.29, - "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05, - "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27, - "STSBenchmarkMultilingualSTS (por-Latn)": 73.31, - "STSBenchmarkMultilingualSTS (en)": 87.29, - "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22, - "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75, - "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63, - "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28 - }, - { - "Model": "multilingual-e5-large", - "AFQMC": 33.02, - "ATEC": 39.81, - "BQ": 46.44, - "CDSC-R": 91.0, - "LCQMC": 75.95, - "PAWSX": 14.63, - "QBQTC": 29.77, - "SICK-R-PL": 75.08, - "SICKFr": 78.78, - "STS22 (zh)": 65.64, - "STS22 (pl)": 34.66, - "STSB": 81.08, - "STSBenchmarkMultilingualSTS (fr)": 82.53 + "Model": "voyage-multilingual-2", + "SICKFr": 74.9, + "STS22": 82.76, + "STSBenchmarkMultilingualSTS": 82.72 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "multilingual-e5-large", - "SummEval": 29.64, - "SummEvalFr (fra-Latn)": 30.92 - }, - { - "Model": "multilingual-e5-large", - "SummEval": 29.64, - "SummEvalFr (fra-Latn)": 30.92 - }, - { - "Model": "multilingual-e5-large", - "SummEvalFr": 30.92 + "Model": "voyage-multilingual-2", + "SummEvalFr": 29.96 } ] }, "MultilabelClassification": { - "accuracy": [ - { - "Model": "multilingual-e5-large", - "CEDRClassification (rus-Cyrl)": 44.84, - "SensitiveTopicsClassification (rus-Cyrl)": 27.17 - } - ] + "accuracy": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "multilingual-e5-large", - "Core17InstructionRetrieval": -1.62, - "News21InstructionRetrieval": -0.06, - "Robust04InstructionRetrieval": -7.48 - } - ] + "p-MRR": [] } }, - "nomic-embed-text-v1.5-128": { + "xlm-roberta-base": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "xlm-roberta-base", + "BornholmBitextMining": 4.42 + } + ] }, "Classification": { "accuracy": [ { - "Model": "nomic-embed-text-v1.5-128", - "AmazonCounterfactualClassification (en)": 69.78, - "AmazonPolarityClassification": 88.74, - "AmazonReviewsClassification (en)": 43.11, - "Banking77Classification": 82.78, - "EmotionClassification": 42.92, - "ImdbClassification": 80.87, - "MTOPDomainClassification (en)": 89.61, - "MTOPIntentClassification (en)": 68.9, - "MassiveIntentClassification (en)": 69.34, - "MassiveScenarioClassification (en)": 74.21, - "ToxicConversationsClassification": 68.16, - "TweetSentimentExtractionClassification": 57.99 + "Model": "xlm-roberta-base", + "AmazonReviewsClassification": 26.75, + "AngryTweetsClassification": 52.41, + "DKHateClassification": 56.78, + "DanishPoliticalCommentsClassification": 34.03, + "LccSentimentClassification": 52.27, + "MTOPDomainClassification": 43.83, + "MTOPIntentClassification": 19.38, + "MasakhaNEWSClassification": 60.5, + "MassiveIntentClassification": 13.58, + "MassiveScenarioClassification": 23.21, + "NoRecClassification": 46.28, + "NordicLangClassification": 79.39, + "NorwegianParliament": 56.75, + "ScalaDaClassification": 57.3, + "ScalaNbClassification": 58.33 } ] }, "Clustering": { "v_measure": [ { - "Model": "nomic-embed-text-v1.5-128", - "ArxivClusteringP2P": 43.87, - "ArxivClusteringS2S": 34.57, - "BiorxivClusteringP2P": 36.79, - "BiorxivClusteringS2S": 30.68, - "MedrxivClusteringP2P": 34.09, - "MedrxivClusteringS2S": 31.3, - "RedditClustering": 53.31, - "RedditClusteringP2P": 58.96, - "StackExchangeClustering": 59.92, - "StackExchangeClusteringP2P": 33.88, - "TwentyNewsgroupsClustering": 47.29 + "Model": "xlm-roberta-base", + "AlloProfClusteringP2P": 52.24, + "AlloProfClusteringS2S": 20.37, + "HALClusteringS2S": 8.68, + "MLSUMClusteringP2P": 40.44, + "MLSUMClusteringS2S": 24.14, + "MasakhaNEWSClusteringP2P": 29.29, + "MasakhaNEWSClusteringS2S": 23.76 } ] }, "PairClassification": { "max_ap": [ { - "Model": "nomic-embed-text-v1.5-128", - "SprintDuplicateQuestions": 91.45, - "TwitterSemEval2015": 73.23, - "TwitterURLCorpus": 85.93 + "Model": "xlm-roberta-base", + "OpusparcusPC": 85.45, + "PawsXPairClassification": 51.35 }, { - "Model": "nomic-embed-text-v1.5-128", - "SprintDuplicateQuestions": 91.45, - "TwitterSemEval2015": 73.23, - "TwitterURLCorpus": 85.93 + "Model": "xlm-roberta-base", + "OpusparcusPC": 85.91, + "PawsXPairClassification": 51.73 } ] }, "Reranking": { "map": [ { - "Model": "nomic-embed-text-v1.5-128", - "AskUbuntuDupQuestions": 61.16, - "MindSmallReranking": 30.02, - "SciDocsRR": 78.05, - "StackOverflowDupQuestions": 49.0 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "nomic-embed-text-v1.5-128", - "ArguAna": 43.4, - "CQADupstackRetrieval": 34.67, - "ClimateFEVER": 36.52, - "DBPedia": 36.22, - "FEVER": 80.48, - "FiQA2018": 32.08, - "HotpotQA": 60.09, - "MSMARCO": 39.99, - "NFCorpus": 30.72, - "NQ": 53.62, - "QuoraRetrieval": 87.07, - "SCIDOCS": 15.56, - "SciFact": 64.28, - "TRECCOVID": 74.58, - "Touche2020": 26.99 + "Model": "xlm-roberta-base", + "AlloprofReranking": 25.58, + "SyntecReranking": 43.75 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "xlm-roberta-base", + "AlloprofRetrieval": 0.16, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 0.88, + "SyntecRetrieval": 3.33, + "XPQARetrieval": 11.65 } ] }, "STS": { "cosine_spearman": [ { - "Model": "nomic-embed-text-v1.5-128", - "BIOSSES": 80.19, - "SICK-R": 79.09, - "STS12": 77.49, - "STS13": 85.62, - "STS14": 80.5, - "STS15": 85.84, - "STS16": 83.9, - "STS17 (en-en)": 86.27, - "STS22 (en)": 64.24, - "STSBenchmark": 84.28 + "Model": "xlm-roberta-base", + "SICKFr": 48.62, + "STS22": 56.72, + "STSBenchmarkMultilingualSTS": 46.23 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "nomic-embed-text-v1.5-128", - "SummEval": 29.59 + "Model": "xlm-roberta-base", + "SummEvalFr": 29.14 } ] }, @@ -24586,736 +22379,94 @@ "p-MRR": [] } }, - "all-mpnet-base-v2": { + "xlm-roberta-large": { "BitextMining": { - "f1": [ - { - "Model": "all-mpnet-base-v2", - "BornholmBitextMining (dan-Latn)": 27.44, - "Tatoeba (pol-Latn_eng-Latn)": 4.09, - "Tatoeba (ita-Latn_eng-Latn)": 11.1, - "Tatoeba (cat-Latn_eng-Latn)": 9.44, - "Tatoeba (aze-Latn_eng-Latn)": 1.49, - "Tatoeba (eus-Latn_eng-Latn)": 3.94, - "Tatoeba (epo-Latn_eng-Latn)": 7.15, - "Tatoeba (lit-Latn_eng-Latn)": 1.02, - "Tatoeba (ast-Latn_eng-Latn)": 9.78, - "Tatoeba (bul-Cyrl_eng-Latn)": 0.35, - "Tatoeba (ceb-Latn_eng-Latn)": 4.41, - "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, - "Tatoeba (tzl-Latn_eng-Latn)": 3.55, - "Tatoeba (zsm-Latn_eng-Latn)": 4.75, - "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17, - "Tatoeba (pam-Latn_eng-Latn)": 4.32, - "Tatoeba (amh-Ethi_eng-Latn)": 0.0, - "Tatoeba (slv-Latn_eng-Latn)": 3.73, - "Tatoeba (lvs-Latn_eng-Latn)": 2.98, - "Tatoeba (sqi-Latn_eng-Latn)": 3.45, - "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, - "Tatoeba (vie-Latn_eng-Latn)": 4.96, - "Tatoeba (pes-Arab_eng-Latn)": 0.2, - "Tatoeba (por-Latn_eng-Latn)": 10.48, - "Tatoeba (dtp-Latn_eng-Latn)": 3.54, - "Tatoeba (yid-Hebr_eng-Latn)": 0.08, - "Tatoeba (isl-Latn_eng-Latn)": 3.86, - "Tatoeba (cha-Latn_eng-Latn)": 12.2, - "Tatoeba (ron-Latn_eng-Latn)": 7.34, - "Tatoeba (hye-Armn_eng-Latn)": 0.14, - "Tatoeba (mar-Deva_eng-Latn)": 0.11, - "Tatoeba (hin-Deva_eng-Latn)": 0.02, - "Tatoeba (kor-Hang_eng-Latn)": 0.32, - "Tatoeba (srp-Cyrl_eng-Latn)": 1.89, - "Tatoeba (csb-Latn_eng-Latn)": 4.19, - "Tatoeba (jpn-Jpan_eng-Latn)": 1.71, - "Tatoeba (ber-Tfng_eng-Latn)": 4.56, - "Tatoeba (wuu-Hans_eng-Latn)": 0.91, - "Tatoeba (jav-Latn_eng-Latn)": 3.17, - "Tatoeba (nob-Latn_eng-Latn)": 4.37, - "Tatoeba (bre-Latn_eng-Latn)": 3.65, - "Tatoeba (kzj-Latn_eng-Latn)": 3.62, - "Tatoeba (urd-Arab_eng-Latn)": 0.0, - "Tatoeba (ces-Latn_eng-Latn)": 3.56, - "Tatoeba (cbk-Latn_eng-Latn)": 9.33, - "Tatoeba (gla-Latn_eng-Latn)": 2.04, - "Tatoeba (war-Latn_eng-Latn)": 5.14, - "Tatoeba (swh-Latn_eng-Latn)": 6.01, - "Tatoeba (swg-Latn_eng-Latn)": 7.86, - "Tatoeba (glg-Latn_eng-Latn)": 12.0, - "Tatoeba (fao-Latn_eng-Latn)": 7.08, - "Tatoeba (gsw-Latn_eng-Latn)": 10.67, - "Tatoeba (rus-Cyrl_eng-Latn)": 0.14, - "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52, - "Tatoeba (gle-Latn_eng-Latn)": 2.19, - "Tatoeba (slk-Latn_eng-Latn)": 3.4, - "Tatoeba (nno-Latn_eng-Latn)": 5.75, - "Tatoeba (cor-Latn_eng-Latn)": 2.42, - "Tatoeba (nov-Latn_eng-Latn)": 16.61, - "Tatoeba (swe-Latn_eng-Latn)": 6.55, - "Tatoeba (max-Deva_eng-Latn)": 6.46, - "Tatoeba (oci-Latn_eng-Latn)": 8.57, - "Tatoeba (lfn-Latn_eng-Latn)": 6.1, - "Tatoeba (fra-Latn_eng-Latn)": 16.9, - "Tatoeba (ben-Beng_eng-Latn)": 0.0, - "Tatoeba (bel-Cyrl_eng-Latn)": 0.65, - "Tatoeba (lat-Latn_eng-Latn)": 5.78, - "Tatoeba (cmn-Hans_eng-Latn)": 2.22, - "Tatoeba (kat-Geor_eng-Latn)": 0.43, - "Tatoeba (bos-Latn_eng-Latn)": 4.6, - "Tatoeba (xho-Latn_eng-Latn)": 3.3, - "Tatoeba (tha-Thai_eng-Latn)": 0.0, - "Tatoeba (cym-Latn_eng-Latn)": 4.88, - "Tatoeba (deu-Latn_eng-Latn)": 11.46, - "Tatoeba (awa-Deva_eng-Latn)": 0.44, - "Tatoeba (ido-Latn_eng-Latn)": 9.84, - "Tatoeba (tat-Cyrl_eng-Latn)": 0.24, - "Tatoeba (kab-Latn_eng-Latn)": 1.31, - "Tatoeba (uzb-Latn_eng-Latn)": 1.98, - "Tatoeba (heb-Hebr_eng-Latn)": 0.28, - "Tatoeba (ara-Arab_eng-Latn)": 0.1, - "Tatoeba (fry-Latn_eng-Latn)": 12.43, - "Tatoeba (afr-Latn_eng-Latn)": 6.08, - "Tatoeba (kur-Latn_eng-Latn)": 3.65, - "Tatoeba (pms-Latn_eng-Latn)": 7.63, - "Tatoeba (ell-Grek_eng-Latn)": 0.0, - "Tatoeba (spa-Latn_eng-Latn)": 10.12, - "Tatoeba (dsb-Latn_eng-Latn)": 2.96, - "Tatoeba (uig-Arab_eng-Latn)": 0.33, - "Tatoeba (nld-Latn_eng-Latn)": 9.29, - "Tatoeba (tel-Telu_eng-Latn)": 0.73, - "Tatoeba (hrv-Latn_eng-Latn)": 3.77, - "Tatoeba (nds-Latn_eng-Latn)": 10.96, - "Tatoeba (hun-Latn_eng-Latn)": 3.23, - "Tatoeba (est-Latn_eng-Latn)": 2.35, - "Tatoeba (mal-Mlym_eng-Latn)": 0.15, - "Tatoeba (khm-Khmr_eng-Latn)": 0.28, - "Tatoeba (hsb-Latn_eng-Latn)": 3.12, - "Tatoeba (tgl-Latn_eng-Latn)": 4.06, - "Tatoeba (ang-Latn_eng-Latn)": 9.77, - "Tatoeba (tur-Latn_eng-Latn)": 3.16, - "Tatoeba (tuk-Latn_eng-Latn)": 2.23, - "Tatoeba (ile-Latn_eng-Latn)": 17.84, - "Tatoeba (mon-Cyrl_eng-Latn)": 0.81, - "Tatoeba (yue-Hant_eng-Latn)": 1.16, - "Tatoeba (ina-Latn_eng-Latn)": 22.55, - "Tatoeba (tam-Taml_eng-Latn)": 0.73, - "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5, - "Tatoeba (dan-Latn_eng-Latn)": 10.01, - "Tatoeba (arq-Arab_eng-Latn)": 0.33, - "Tatoeba (arz-Arab_eng-Latn)": 0.0, - "Tatoeba (fin-Latn_eng-Latn)": 3.82, - "Tatoeba (ind-Latn_eng-Latn)": 4.88 - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "all-mpnet-base-v2", - "AllegroReviews (pol-Latn)": 22.99, - "AmazonCounterfactualClassification (en-ext)": 67.5, - "AmazonCounterfactualClassification (en)": 65.03, - "AmazonCounterfactualClassification (deu-Latn)": 55.66, - "AmazonCounterfactualClassification (jpn-Jpan)": 60.69, - "AmazonPolarityClassification": 67.14, - "AmazonReviewsClassification (en)": 31.44, - "AmazonReviewsClassification (deu-Latn)": 26.05, - "AmazonReviewsClassification (spa-Latn)": 27.73, - "AmazonReviewsClassification (fra-Latn)": 28.49, - "AmazonReviewsClassification (jpn-Jpan)": 23.65, - "AmazonReviewsClassification (cmn-Hans)": 23.62, - "AngryTweetsClassification (dan-Latn)": 44.13, - "Banking77Classification": 81.7, - "CBD (pol-Latn)": 50.25, - "DanishPoliticalCommentsClassification (dan-Latn)": 28.31, - "EmotionClassification": 42.22, - "GeoreviewClassification (rus-Cyrl)": 25.93, - "HeadlineClassification (rus-Cyrl)": 28.53, - "IFlyTek (cmn-Hans)": 17.18, - "ImdbClassification": 71.17, - "InappropriatenessClassification (rus-Cyrl)": 51.82, - "JDReview (cmn-Hans)": 60.19, - "KinopoiskClassification (rus-Cyrl)": 34.18, - "LccSentimentClassification (dan-Latn)": 39.27, - "MTOPDomainClassification (en)": 91.89, - "MTOPDomainClassification (deu-Latn)": 71.86, - "MTOPDomainClassification (spa-Latn)": 71.3, - "MTOPDomainClassification (fra-Latn)": 74.88, - "MTOPDomainClassification (hin-Deva)": 39.93, - "MTOPDomainClassification (tha-Thai)": 17.54, - "MTOPIntentClassification (en)": 68.27, - "MTOPIntentClassification (deu-Latn)": 44.36, - "MTOPIntentClassification (spa-Latn)": 39.48, - "MTOPIntentClassification (fra-Latn)": 37.57, - "MTOPIntentClassification (hin-Deva)": 18.63, - "MTOPIntentClassification (tha-Thai)": 5.39, - "MasakhaNEWSClassification (amh-Ethi)": 36.49, - "MasakhaNEWSClassification (eng)": 79.75, - "MasakhaNEWSClassification (fra-Latn)": 77.77, - "MasakhaNEWSClassification (hau-Latn)": 59.22, - "MasakhaNEWSClassification (ibo-Latn)": 61.64, - "MasakhaNEWSClassification (lin-Latn)": 74.0, - "MasakhaNEWSClassification (lug-Latn)": 58.43, - "MasakhaNEWSClassification (orm-Ethi)": 48.15, - "MasakhaNEWSClassification (pcm-Latn)": 92.2, - "MasakhaNEWSClassification (run-Latn)": 64.72, - "MasakhaNEWSClassification (sna-Latn)": 73.69, - "MasakhaNEWSClassification (som-Latn)": 49.97, - "MasakhaNEWSClassification (swa-Latn)": 55.15, - "MasakhaNEWSClassification (tir-Ethi)": 27.46, - "MasakhaNEWSClassification (xho-Latn)": 60.98, - "MasakhaNEWSClassification (yor-Latn)": 63.33, - "MassiveIntentClassification (en)": 69.76, - "MassiveIntentClassification (ara-Arab)": 20.42, - "MassiveIntentClassification (isl-Latn)": 31.46, - "MassiveIntentClassification (rus-Cyrl)": 23.98, - "MassiveIntentClassification (hun-Latn)": 34.38, - "MassiveIntentClassification (pol-Latn)": 34.26, - "MassiveIntentClassification (lav-Latn)": 35.08, - "MassiveIntentClassification (msa-Latn)": 30.53, - "MassiveIntentClassification (ind-Latn)": 36.31, - "MassiveIntentClassification (kan-Knda)": 3.76, - "MassiveIntentClassification (tam-Taml)": 9.25, - "MassiveIntentClassification (ron-Latn)": 38.07, - "MassiveIntentClassification (por-Latn)": 42.83, - "MassiveIntentClassification (jpn-Jpan)": 33.13, - "MassiveIntentClassification (tgl-Latn)": 36.33, - "MassiveIntentClassification (amh-Ethi)": 2.4, - "MassiveIntentClassification (fin-Latn)": 34.58, - "MassiveIntentClassification (hye-Armn)": 10.11, - "MassiveIntentClassification (nld-Latn)": 38.49, - "MassiveIntentClassification (tur-Latn)": 32.02, - "MassiveIntentClassification (urd-Arab)": 12.86, - "MassiveIntentClassification (cym-Latn)": 30.82, - "MassiveIntentClassification (fra-Latn)": 44.27, - "MassiveIntentClassification (aze-Latn)": 28.92, - "MassiveIntentClassification (ben-Beng)": 12.35, - "MassiveIntentClassification (mon-Cyrl)": 19.65, - "MassiveIntentClassification (ita-Latn)": 40.29, - "MassiveIntentClassification (tel-Telu)": 2.26, - "MassiveIntentClassification (kat-Geor)": 7.66, - "MassiveIntentClassification (hin-Deva)": 17.68, - "MassiveIntentClassification (fas-Arab)": 22.45, - "MassiveIntentClassification (swe-Latn)": 39.02, - "MassiveIntentClassification (heb-Hebr)": 23.6, - "MassiveIntentClassification (sqi-Latn)": 37.26, - "MassiveIntentClassification (mal-Mlym)": 2.62, - "MassiveIntentClassification (vie-Latn)": 31.47, - "MassiveIntentClassification (mya-Mymr)": 4.6, - "MassiveIntentClassification (jav-Latn)": 31.75, - "MassiveIntentClassification (cmo-Hans)": 24.36, - "MassiveIntentClassification (swa-Latn)": 31.82, - "MassiveIntentClassification (nob-Latn)": 39.3, - "MassiveIntentClassification (cmo-Hant)": 22.43, - "MassiveIntentClassification (ell-Grek)": 24.52, - "MassiveIntentClassification (deu-Latn)": 44.54, - "MassiveIntentClassification (tha-Thai)": 8.51, - "MassiveIntentClassification (dan-Latn)": 42.36, - "MassiveIntentClassification (afr-Latn)": 36.49, - "MassiveIntentClassification (spa-Latn)": 39.75, - "MassiveIntentClassification (kor-Kore)": 13.35, - "MassiveIntentClassification (slv-Latn)": 34.49, - "MassiveIntentClassification (khm-Khmr)": 4.76, - "MassiveScenarioClassification (en)": 75.67, - "MassiveScenarioClassification (kor-Kore)": 17.28, - "MassiveScenarioClassification (swe-Latn)": 44.53, - "MassiveScenarioClassification (hye-Armn)": 16.86, - "MassiveScenarioClassification (nob-Latn)": 45.75, - "MassiveScenarioClassification (pol-Latn)": 42.66, - "MassiveScenarioClassification (ind-Latn)": 43.05, - "MassiveScenarioClassification (ita-Latn)": 51.37, - "MassiveScenarioClassification (tgl-Latn)": 47.04, - "MassiveScenarioClassification (jav-Latn)": 40.0, - "MassiveScenarioClassification (lav-Latn)": 39.28, - "MassiveScenarioClassification (mya-Mymr)": 10.8, - "MassiveScenarioClassification (por-Latn)": 52.06, - "MassiveScenarioClassification (tel-Telu)": 7.81, - "MassiveScenarioClassification (deu-Latn)": 54.09, - "MassiveScenarioClassification (fas-Arab)": 27.8, - "MassiveScenarioClassification (hin-Deva)": 23.13, - "MassiveScenarioClassification (hun-Latn)": 41.01, - "MassiveScenarioClassification (vie-Latn)": 35.9, - "MassiveScenarioClassification (fra-Latn)": 54.26, - "MassiveScenarioClassification (jpn-Jpan)": 40.57, - "MassiveScenarioClassification (tha-Thai)": 17.01, - "MassiveScenarioClassification (swa-Latn)": 40.34, - "MassiveScenarioClassification (ell-Grek)": 33.85, - "MassiveScenarioClassification (aze-Latn)": 36.42, - "MassiveScenarioClassification (heb-Hebr)": 25.49, - "MassiveScenarioClassification (kat-Geor)": 13.45, - "MassiveScenarioClassification (afr-Latn)": 43.63, - "MassiveScenarioClassification (ben-Beng)": 17.49, - "MassiveScenarioClassification (cym-Latn)": 34.82, - "MassiveScenarioClassification (mon-Cyrl)": 25.58, - "MassiveScenarioClassification (tur-Latn)": 39.11, - "MassiveScenarioClassification (tam-Taml)": 14.55, - "MassiveScenarioClassification (ara-Arab)": 27.8, - "MassiveScenarioClassification (msa-Latn)": 37.28, - "MassiveScenarioClassification (cmo-Hant)": 31.7, - "MassiveScenarioClassification (dan-Latn)": 49.45, - "MassiveScenarioClassification (kan-Knda)": 8.34, - "MassiveScenarioClassification (urd-Arab)": 20.0, - "MassiveScenarioClassification (cmo-Hans)": 35.33, - "MassiveScenarioClassification (amh-Ethi)": 7.43, - "MassiveScenarioClassification (ron-Latn)": 47.86, - "MassiveScenarioClassification (fin-Latn)": 38.41, - "MassiveScenarioClassification (isl-Latn)": 39.36, - "MassiveScenarioClassification (sqi-Latn)": 44.67, - "MassiveScenarioClassification (spa-Latn)": 50.92, - "MassiveScenarioClassification (mal-Mlym)": 7.69, - "MassiveScenarioClassification (slv-Latn)": 39.88, - "MassiveScenarioClassification (nld-Latn)": 47.79, - "MassiveScenarioClassification (khm-Khmr)": 9.63, - "MassiveScenarioClassification (rus-Cyrl)": 28.71, - "MultilingualSentiment (cmn-Hans)": 41.2, - "NoRecClassification (nob-Latn)": 38.34, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15, - "OnlineShopping (cmn-Hans)": 56.94, - "PAC (pol-Latn)": 62.1, - "PolEmo2.0-IN (pol-Latn)": 41.63, - "PolEmo2.0-OUT (pol-Latn)": 25.0, - "RuReviewsClassification (rus-Cyrl)": 42.33, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 13.29, - "RuSciBenchOECDClassification (rus-Cyrl)": 10.62, - "TNews (cmn-Hans)": 21.05, - "ToxicConversationsClassification": 61.05, - "TweetSentimentExtractionClassification": 55.05, - "Waimai (cmn-Hans)": 63.31 + "Model": "xlm-roberta-large", + "AmazonReviewsClassification": 26.62, + "MTOPDomainClassification": 36.77, + "MTOPIntentClassification": 15.37, + "MasakhaNEWSClassification": 65.76, + "MassiveIntentClassification": 15.82, + "MassiveScenarioClassification": 23.92 } ] }, "Clustering": { "v_measure": [ { - "Model": "all-mpnet-base-v2", - "ArxivClusteringP2P": 48.38, - "ArxivClusteringS2S": 39.72, - "BiorxivClusteringP2P": 39.62, - "BiorxivClusteringS2S": 35.02, - "GeoreviewClusteringP2P (rus-Cyrl)": 20.33, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49, - "MasakhaNEWSClusteringP2P (eng)": 67.24, - "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99, - "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0, - "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68, - "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01, - "MasakhaNEWSClusteringP2P (run-Latn)": 57.6, - "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99, - "MasakhaNEWSClusteringP2P (som-Latn)": 31.16, - "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85, - "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24, - "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48, - "MasakhaNEWSClusteringS2S (eng)": 35.69, - "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05, - "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63, - "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72, - "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7, - "MasakhaNEWSClusteringS2S (run-Latn)": 52.27, - "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64, - "MasakhaNEWSClusteringS2S (som-Latn)": 30.94, - "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01, - "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16, - "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04, - "MedrxivClusteringP2P": 35.58, - "MedrxivClusteringS2S": 32.87, - "RedditClustering": 54.82, - "RedditClusteringP2P": 56.77, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 14.66, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 12.49, - "StackExchangeClustering": 53.8, - "StackExchangeClusteringP2P": 34.28, - "TwentyNewsgroupsClustering": 49.74 + "Model": "xlm-roberta-large", + "AlloProfClusteringP2P": 56.54, + "AlloProfClusteringS2S": 21.18, + "BlurbsClusteringP2P": 29.84, + "BlurbsClusteringS2S": 7.29, + "HALClusteringS2S": 5.94, + "MLSUMClusteringP2P": 42.67, + "MLSUMClusteringS2S": 18.5, + "MasakhaNEWSClusteringP2P": 34.02, + "MasakhaNEWSClusteringS2S": 21.52, + "TenKGnadClusteringP2P": 32.46, + "TenKGnadClusteringS2S": 6.16 } ] }, "PairClassification": { "max_ap": [ { - "Model": "all-mpnet-base-v2", - "CDSC-E (pol-Latn)": 45.37, - "OpusparcusPC (deu-Latn)": 89.78, - "OpusparcusPC (en)": 97.75, - "OpusparcusPC (fin-Latn)": 85.82, - "OpusparcusPC (fra-Latn)": 86.61, - "OpusparcusPC (rus-Cyrl)": 79.85, - "OpusparcusPC (swe-Latn)": 81.81, - "PSC (pol-Latn)": 83.28, - "PawsXPairClassification (deu-Latn)": 52.17, - "PawsXPairClassification (en)": 61.99, - "PawsXPairClassification (spa-Latn)": 55.06, - "PawsXPairClassification (fra-Latn)": 56.42, - "PawsXPairClassification (jpn-Hira)": 47.43, - "PawsXPairClassification (kor-Hang)": 49.75, - "PawsXPairClassification (cmn-Hans)": 52.47, - "SICK-E-PL (pol-Latn)": 46.51, - "SprintDuplicateQuestions": 90.15, - "TERRa (rus-Cyrl)": 44.52, - "TwitterSemEval2015": 73.85, - "TwitterURLCorpus": 85.11 + "Model": "xlm-roberta-large", + "OpusparcusPC": 83.73, + "PawsXPairClassification": 53.38 }, { - "Model": "all-mpnet-base-v2", - "CDSC-E (pol-Latn)": 45.37, - "OpusparcusPC (deu-Latn)": 89.78, - "OpusparcusPC (en)": 97.75, - "OpusparcusPC (fin-Latn)": 85.82, - "OpusparcusPC (fra-Latn)": 86.61, - "OpusparcusPC (rus-Cyrl)": 79.93, - "OpusparcusPC (swe-Latn)": 81.81, - "PSC (pol-Latn)": 83.28, - "PawsXPairClassification (deu-Latn)": 52.28, - "PawsXPairClassification (en)": 61.99, - "PawsXPairClassification (spa-Latn)": 55.06, - "PawsXPairClassification (fra-Latn)": 56.42, - "PawsXPairClassification (jpn-Hira)": 47.6, - "PawsXPairClassification (kor-Hang)": 49.84, - "PawsXPairClassification (cmn-Hans)": 52.51, - "SICK-E-PL (pol-Latn)": 46.55, - "SprintDuplicateQuestions": 90.15, - "TERRa (rus-Cyrl)": 44.52, - "TwitterSemEval2015": 73.87, - "TwitterURLCorpus": 85.11 + "Model": "xlm-roberta-large", + "OpusparcusPC": 83.79, + "PawsXPairClassification": 53.44 } ] }, "Reranking": { "map": [ { - "Model": "all-mpnet-base-v2", - "AlloprofReranking (fra-Latn)": 69.63, - "AskUbuntuDupQuestions": 65.85, - "MMarcoReranking (cmn-Hans)": 4.65, - "MindSmallReranking": 30.97, - "RuBQReranking (rus-Cyrl)": 30.96, - "SciDocsRR": 88.65, - "StackOverflowDupQuestions": 51.98, - "SyntecReranking (fra-Latn)": 66.12, - "T2Reranking (cmn-Hans)": 58.3 + "Model": "xlm-roberta-large", + "AlloprofReranking": 28.62, + "SyntecReranking": 49.4 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "all-mpnet-base-v2", - "AILACasedocs": 22.51, - "AILAStatutes": 21.27, - "ARCChallenge": 11.8, - "AlloprofRetrieval (fra-Latn)": 34.27, - "AlphaNLI": 22.41, - "AppsRetrieval (eng-Latn_python-Code)": 8.41, - "ArguAna": 46.52, - "ArguAna-PL (pol-Latn)": 14.72, - "BSARDRetrieval (fra-Latn)": 6.98, - "BrightRetrieval (robotics)": 8.36, - "BrightRetrieval (psychology)": 22.63, - "BrightRetrieval (leetcode)": 26.4, - "BrightRetrieval (biology)": 15.52, - "BrightRetrieval (theoremqa_questions)": 18.49, - "BrightRetrieval (economics)": 16.64, - "BrightRetrieval (stackoverflow)": 9.48, - "BrightRetrieval (pony)": 6.95, - "BrightRetrieval (earth_science)": 20.11, - "BrightRetrieval (theoremqa_theorems)": 9.9, - "BrightRetrieval (sustainable_living)": 15.34, - "BrightRetrieval (aops)": 5.32, - "CQADupstackRetrieval": 44.96, - "ClimateFEVER": 21.97, - "CmedqaRetrieval (cmn-Hans)": 2.0, - "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 37.72, - "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 69.32, - "CodeSearchNetCCRetrieval (python-Code)": 71.83, - "CodeSearchNetCCRetrieval (javascript-Code)": 67.99, - "CodeSearchNetCCRetrieval (go-Code)": 61.44, - "CodeSearchNetCCRetrieval (ruby-Code)": 66.42, - "CodeSearchNetCCRetrieval (java-Code)": 68.88, - "CodeSearchNetCCRetrieval (php-Code)": 59.22, - "CodeSearchNetRetrieval (python-Code)": 81.01, - "CodeSearchNetRetrieval (javascript-Code)": 65.58, - "CodeSearchNetRetrieval (go-Code)": 88.25, - "CodeSearchNetRetrieval (ruby-Code)": 74.53, - "CodeSearchNetRetrieval (java-Code)": 65.11, - "CodeSearchNetRetrieval (php-Code)": 74.47, - "CodeTransOceanContest (python-Code_c++-Code)": 70.58, - "CodeTransOceanDL": 29.45, - "CosQA (eng-Latn_python-Code)": 33.71, - "CovidRetrieval (cmn-Hans)": 3.7, - "DBPedia": 32.09, - "DuRetrieval (cmn-Hans)": 4.92, - "EcomRetrieval (cmn-Hans)": 3.94, - "FEVER": 50.86, - "FiQA-PL (pol-Latn)": 3.6, - "FiQA2018": 49.96, - "GerDaLIRSmall (deu-Latn)": 3.78, - "HellaSwag": 26.27, - "HotpotQA": 39.29, - "LEMBNarrativeQARetrieval": 19.34, - "LEMBQMSumRetrieval": 21.54, - "LEMBSummScreenFDRetrieval": 60.43, - "LEMBWikimQARetrieval": 44.92, - "LeCaRDv2 (zho-Hans)": 18.09, - "LegalBenchConsumerContractsQA": 75.25, - "LegalBenchCorporateLobbying": 89.04, - "LegalQuAD (deu-Latn)": 10.67, - "LegalSummarization": 58.55, - "MMarcoRetrieval (cmn-Hans)": 7.13, - "MSMARCO": 39.75, - "MedicalRetrieval (cmn-Hans)": 1.71, - "MintakaRetrieval (ara-Arab)": 1.97, - "MintakaRetrieval (deu-Latn)": 17.21, - "MintakaRetrieval (spa-Latn)": 10.11, - "MintakaRetrieval (fra-Latn)": 12.93, - "MintakaRetrieval (hin-Deva)": 2.03, - "MintakaRetrieval (ita-Latn)": 5.63, - "MintakaRetrieval (jpn-Hira)": 6.77, - "MintakaRetrieval (por-Latn)": 8.05, - "NFCorpus": 33.29, - "NFCorpus-PL (pol-Latn)": 8.77, - "NQ": 50.45, - "PIQA": 29.03, - "Quail": 3.41, - "QuoraRetrieval": 87.46, - "RARbCode": 53.21, - "RARbMath": 71.85, - "RuBQRetrieval (rus-Cyrl)": 4.75, - "SCIDOCS": 23.76, - "SCIDOCS-PL (pol-Latn)": 4.02, - "SIQA": 2.38, - "SciFact": 65.57, - "SciFact-PL (pol-Latn)": 13.31, - "SpartQA": 0.22, - "StackOverflowQA": 90.32, - "SyntecRetrieval (fra-Latn)": 57.39, - "SyntheticText2SQL (eng-Latn_sql-Code)": 45.09, - "T2Retrieval (cmn-Hans)": 2.98, - "TRECCOVID": 51.33, - "TRECCOVID-PL (pol-Latn)": 12.11, - "TempReasonL1": 1.77, - "TempReasonL2Fact": 11.2, - "TempReasonL2Pure": 1.15, - "TempReasonL3Fact": 9.42, - "TempReasonL3Pure": 5.59, - "Touche2020": 19.93, - "VideoRetrieval (cmn-Hans)": 8.48, - "WinoGrande": 20.77, - "XPQARetrieval (ara-Arab_ara-Arab)": 9.42, - "XPQARetrieval (eng-Latn_ara-Arab)": 2.36, - "XPQARetrieval (ara-Arab_eng-Latn)": 8.98, - "XPQARetrieval (deu-Latn_deu-Latn)": 55.82, - "XPQARetrieval (eng-Latn_deu-Latn)": 11.74, - "XPQARetrieval (deu-Latn_eng-Latn)": 30.44, - "XPQARetrieval (spa-Latn_spa-Latn)": 40.01, - "XPQARetrieval (eng-Latn_spa-Latn)": 6.12, - "XPQARetrieval (spa-Latn_eng-Latn)": 29.44, - "XPQARetrieval (fra-Latn_fra-Latn)": 51.94, - "XPQARetrieval (eng-Latn_fra-Latn)": 11.48, - "XPQARetrieval (fra-Latn_eng-Latn)": 32.52, - "XPQARetrieval (hin-Deva_hin-Deva)": 37.45, - "XPQARetrieval (eng-Latn_hin-Deva)": 5.11, - "XPQARetrieval (hin-Deva_eng-Latn)": 7.37, - "XPQARetrieval (ita-Latn_ita-Latn)": 54.2, - "XPQARetrieval (eng-Latn_ita-Latn)": 6.08, - "XPQARetrieval (ita-Latn_eng-Latn)": 30.32, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.46, - "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79, - "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77, - "XPQARetrieval (kor-Hang_kor-Hang)": 10.39, - "XPQARetrieval (eng-Latn_kor-Hang)": 7.09, - "XPQARetrieval (kor-Hang_eng-Latn)": 6.96, - "XPQARetrieval (pol-Latn_pol-Latn)": 23.71, - "XPQARetrieval (eng-Latn_pol-Latn)": 8.83, - "XPQARetrieval (pol-Latn_eng-Latn)": 15.94, - "XPQARetrieval (por-Latn_por-Latn)": 33.56, - "XPQARetrieval (eng-Latn_por-Latn)": 3.76, - "XPQARetrieval (por-Latn_eng-Latn)": 23.45, - "XPQARetrieval (tam-Taml_tam-Taml)": 5.5, - "XPQARetrieval (eng-Latn_tam-Taml)": 3.3, - "XPQARetrieval (tam-Taml_eng-Latn)": 4.18, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.8, - "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2, - "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84 - }, - { - "Model": "all-mpnet-base-v2", - "LEMBNeedleRetrieval": 16.0, - "LEMBPasskeyRetrieval": 24.5 - } - ], - "recall_at_1": [ - { - "Model": "all-mpnet-base-v2", - "BrightRetrieval (biology)": 25.57, - "BrightRetrieval (sustainable_living)": 18.01, - "BrightRetrieval (psychology)": 15.84, - "BrightRetrieval (economics)": 18.93, - "BrightRetrieval (earth_science)": 34.05, - "BrightRetrieval (stackoverflow)": 14.96, - "BrightRetrieval (pony)": 1.19, - "BrightRetrieval (robotics)": 10.89 + "Model": "xlm-roberta-large", + "AlloprofRetrieval": 0.52, + "BSARDRetrieval": 0.0, + "MintakaRetrieval": 0.9, + "SyntecRetrieval": 6.6, + "XPQARetrieval": 12.7 } ] }, "STS": { "cosine_spearman": [ { - "Model": "all-mpnet-base-v2", - "AFQMC (cmn-Hans)": 8.01, - "ATEC (cmn-Hans)": 14.03, - "BIOSSES": 80.43, - "BQ (cmn-Hans)": 21.39, - "CDSC-R (pol-Latn)": 77.04, - "LCQMC (cmn-Hans)": 22.84, - "PAWSX (cmn-Hans)": 6.44, - "RUParaPhraserSTS (rus-Cyrl)": 42.15, - "RuSTSBenchmarkSTS (rus-Cyrl)": 55.68, - "SICK-R": 80.59, - "SICK-R-PL (pol-Latn)": 50.2, - "SICKFr (fra-Latn)": 67.05, - "STS12": 72.63, - "STS13": 83.48, - "STS14": 78.0, - "STS15": 85.66, - "STS16": 80.03, - "STS17 (fra-Latn_eng-Latn)": 41.64, - "STS17 (nld-Latn_eng-Latn)": 32.89, - "STS17 (spa-Latn_eng-Latn)": 25.28, - "STS17 (en-en)": 90.6, - "STS17 (kor-Hang)": 39.11, - "STS17 (ara-Arab)": 55.42, - "STS17 (spa-Latn)": 78.4, - "STS17 (eng-Latn_deu-Latn)": 35.5, - "STS17 (eng-Latn_ara-Arab)": 6.76, - "STS17 (eng-Latn_tur-Latn)": -4.58, - "STS17 (ita-Latn_eng-Latn)": 31.8, - "STS22 (pol-Latn)": 24.21, - "STS22 (ita-Latn)": 58.02, - "STS22 (spa-Latn_eng-Latn)": 55.09, - "STS22 (fra-Latn)": 77.1, - "STS22 (tur-Latn)": 29.35, - "STS22 (cmn-Hans)": 42.24, - "STS22 (deu-Latn)": 27.0, - "STS22 (spa-Latn_ita-Latn)": 41.61, - "STS22 (fra-Latn_pol-Latn)": 73.25, - "STS22 (deu-Latn_eng-Latn)": 49.73, - "STS22 (cmn-Hans_eng-Latn)": 40.47, - "STS22 (spa-Latn)": 55.98, - "STS22 (ara-Arab)": 38.96, - "STS22 (en)": 68.39, - "STS22 (deu-Latn_pol-Latn)": 23.53, - "STS22 (rus-Cyrl)": 15.83, - "STS22 (pol-Latn_eng-Latn)": 51.07, - "STS22 (deu-Latn_fra-Latn)": 31.39, - "STSB (cmn-Hans)": 37.7, - "STSBenchmark": 83.42, - "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54, - "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15, - "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72, - "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78, - "STSBenchmarkMultilingualSTS (en)": 83.42, - "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 62.12, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43, - "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36 - }, - { - "Model": "all-mpnet-base-v2", - "STS17 (en-en)": 90.6, - "STS22 (en)": 67.95 + "Model": "xlm-roberta-large", + "SICKFr": 50.01, + "STS22": 55.49, + "STSBenchmarkMultilingualSTS": 42.32 } ] }, "Summarization": { "cosine_spearman": [ { - "Model": "all-mpnet-base-v2", - "SummEval": 27.49, - "SummEvalFr (fra-Latn)": 28.11 - }, - { - "Model": "all-mpnet-base-v2", - "SummEval": 27.49, - "SummEvalFr (fra-Latn)": 28.11 - } - ] - }, - "MultilabelClassification": { - "accuracy": [ - { - "Model": "all-mpnet-base-v2", - "CEDRClassification (rus-Cyrl)": 35.98, - "SensitiveTopicsClassification (rus-Cyrl)": 17.83 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "all-mpnet-base-v2", - "Core17InstructionRetrieval": -0.74, - "News21InstructionRetrieval": -1.79, - "Robust04InstructionRetrieval": -6.71 - } - ] - } - }, - "e5-large": { - "BitextMining": { - "f1": [ - { - "Model": "e5-large", - "BornholmBitextMining": 40.15 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "e5-large", - "AngryTweetsClassification": 46.14, - "DKHateClassification": 58.72, - "DanishPoliticalCommentsClassification": 28.67, - "LccSentimentClassification": 42.13, - "MassiveIntentClassification (da)": 42.29, - "MassiveIntentClassification (nb)": 40.63, - "MassiveIntentClassification (sv)": 40.69, - "MassiveScenarioClassification (da)": 52.95, - "MassiveScenarioClassification (nb)": 51.91, - "MassiveScenarioClassification (sv)": 50.97, - "NoRecClassification": 41.83, - "NordicLangClassification": 58.3, - "NorwegianParliament": 57.26, - "ScalaDaClassification": 49.9, - "ScalaNbClassification": 50.13 + "Model": "xlm-roberta-large", + "SummEvalFr": 28.89 } ] }, - "Clustering": { - "v_measure": [] - }, - "PairClassification": { - "max_ap": [] - }, - "Reranking": { - "map": [] - }, - "Retrieval": { - "ndcg_at_10": [] - }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { "accuracy": [] },