diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json" --- "a/EXTERNAL_MODEL_RESULTS.json" +++ "b/EXTERNAL_MODEL_RESULTS.json" @@ -1457,173 +1457,6 @@ ] } }, - "gte-Qwen1.5-7B-instruct": { - "BitextMining": { - "f1": [ - { - "Model": "gte-Qwen1.5-7B-instruct" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "AmazonCounterfactualClassification (en)": 83.16, - "AmazonPolarityClassification": 96.7, - "AmazonReviewsClassification (en)": 62.17, - "AmazonReviewsClassification (zh)": 52.95, - "Banking77Classification": 81.68, - "EmotionClassification": 54.53, - "IFlyTek": 53.77, - "ImdbClassification": 95.58, - "JDReview": 88.2, - "MTOPDomainClassification (en)": 95.75, - "MTOPIntentClassification (en)": 84.26, - "MassiveIntentClassification (zh-CN)": 76.25, - "MassiveIntentClassification (en)": 78.47, - "MassiveScenarioClassification (en)": 78.19, - "MassiveScenarioClassification (zh-CN)": 77.26, - "MultilingualSentiment": 77.42, - "OnlineShopping": 94.48, - "TNews": 51.24, - "ToxicConversationsClassification": 78.75, - "TweetSentimentExtractionClassification": 66.0, - "Waimai": 88.63 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "ArxivClusteringP2P": 56.4, - "ArxivClusteringS2S": 51.45, - "BiorxivClusteringP2P": 49.01, - "BiorxivClusteringS2S": 45.06, - "CLSClusteringP2P": 47.21, - "CLSClusteringS2S": 45.79, - "MedrxivClusteringP2P": 44.37, - "MedrxivClusteringS2S": 42.0, - "RedditClustering": 73.37, - "RedditClusteringP2P": 72.51, - "StackExchangeClustering": 79.07, - "StackExchangeClusteringP2P": 49.57, - "ThuNewsClusteringP2P": 87.43, - "ThuNewsClusteringS2S": 87.9, - "TwentyNewsgroupsClustering": 51.31 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "Cmnli": 91.81, - "Ocnli": 85.22, - "SprintDuplicateQuestions": 95.99, - "TwitterSemEval2015": 79.36, - "TwitterURLCorpus": 86.79 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "AskUbuntuDupQuestions": 66.0, - "CMedQAv1": 86.37, - "CMedQAv2": 87.41, - "MindSmallReranking": 32.71, - "SciDocsRR": 87.89, - "StackOverflowDupQuestions": 53.93, - "T2Reranking": 68.11 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "ArguAna": 62.65, - "BrightRetrieval (stackoverflow)": 19.85, - "BrightRetrieval (earth_science)": 36.22, - "BrightRetrieval (leetcode)": 25.46, - "BrightRetrieval (theoremqa_questions)": 26.97, - "BrightRetrieval (economics)": 17.72, - "BrightRetrieval (robotics)": 13.47, - "BrightRetrieval (pony)": 9.79, - "BrightRetrieval (aops)": 14.36, - "BrightRetrieval (psychology)": 24.61, - "BrightRetrieval (theoremqa_theorems)": 26.66, - "BrightRetrieval (biology)": 30.92, - "BrightRetrieval (sustainable_living)": 14.93, - "CQADupstackRetrieval": 40.64, - "ClimateFEVER": 44.0, - "CmedqaRetrieval": 43.47, - "CovidRetrieval": 80.87, - "DBPedia": 48.04, - "DuRetrieval": 86.01, - "EcomRetrieval": 66.46, - "FEVER": 93.35, - "FiQA2018": 55.31, - "HotpotQA": 72.25, - "MMarcoRetrieval": 73.83, - "MSMARCO": 41.68, - "MedicalRetrieval": 61.33, - "NFCorpus": 38.25, - "NQ": 61.79, - "QuoraRetrieval": 89.61, - "SCIDOCS": 27.69, - "SciFact": 75.31, - "T2Retrieval": 83.58, - "TRECCOVID": 72.72, - "Touche2020": 20.3, - "VideoRetrieval": 69.41 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "AFQMC": 58.47, - "ATEC": 55.46, - "BIOSSES": 81.12, - "BQ": 77.59, - "LCQMC": 76.29, - "PAWSX": 50.22, - "QBQTC": 31.82, - "SICK-R": 79.15, - "STS12": 76.52, - "STS13": 88.63, - "STS14": 83.32, - "STS15": 87.5, - "STS16": 86.39, - "STS17 (en-en)": 87.79, - "STS22 (en)": 66.4, - "STS22 (zh)": 67.36, - "STSB": 81.37, - "STSBenchmark": 87.35 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "gte-Qwen1.5-7B-instruct", - "SummEval": 31.46 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "gte-Qwen1.5-7B-instruct" - } - ] - } - }, "text-similarity-curie-001": { "BitextMining": { "f1": [ @@ -2719,83 +2552,6 @@ ] } }, - "SFR-Embedding-Mistral": { - "BitextMining": { - "f1": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "SFR-Embedding-Mistral", - "BrightRetrieval (sustainable_living)": 19.79, - "BrightRetrieval (economics)": 17.84, - "BrightRetrieval (theoremqa_theorems)": 24.05, - "BrightRetrieval (aops)": 7.43, - "BrightRetrieval (theoremqa_questions)": 23.05, - "BrightRetrieval (psychology)": 18.97, - "BrightRetrieval (stackoverflow)": 12.72, - "BrightRetrieval (pony)": 1.97, - "BrightRetrieval (leetcode)": 27.35, - "BrightRetrieval (biology)": 19.49, - "BrightRetrieval (earth_science)": 26.63, - "BrightRetrieval (robotics)": 16.7 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "SFR-Embedding-Mistral" - } - ] - } - }, "glove.6B.300d": { "BitextMining": { "f1": [ @@ -4171,145 +3927,12 @@ ] } }, - "voyage-large-2-instruct": { + "rubert-base-cased": { "BitextMining": { "f1": [ { - "Model": "voyage-large-2-instruct" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "voyage-large-2-instruct", - "AmazonCounterfactualClassification (en)": 77.6, - "AmazonPolarityClassification": 96.58, - "AmazonReviewsClassification (en)": 50.77, - "Banking77Classification": 86.96, - "EmotionClassification": 59.81, - "ImdbClassification": 96.13, - "MTOPDomainClassification (en)": 98.86, - "MTOPIntentClassification (en)": 86.97, - "MassiveIntentClassification (en)": 81.08, - "MassiveScenarioClassification (en)": 87.95, - "ToxicConversationsClassification": 83.58, - "TweetSentimentExtractionClassification": 71.55 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "voyage-large-2-instruct", - "ArxivClusteringP2P": 51.81, - "ArxivClusteringS2S": 44.73, - "BiorxivClusteringP2P": 46.07, - "BiorxivClusteringS2S": 40.64, - "MedrxivClusteringP2P": 42.94, - "MedrxivClusteringS2S": 41.44, - "RedditClustering": 68.5, - "RedditClusteringP2P": 64.86, - "StackExchangeClustering": 74.16, - "StackExchangeClusteringP2P": 45.1, - "TwentyNewsgroupsClustering": 66.62 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.5, - "TwitterSemEval2015": 86.32, - "TwitterURLCorpus": 86.9 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "voyage-large-2-instruct", - "AskUbuntuDupQuestions": 64.92, - "MindSmallReranking": 30.97, - "SciDocsRR": 89.34, - "StackOverflowDupQuestions": 55.11 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "voyage-large-2-instruct", - "ArguAna": 64.06, - "BrightRetrieval (theoremqa_questions)": 26.06, - "BrightRetrieval (earth_science)": 25.09, - "BrightRetrieval (leetcode)": 30.6, - "BrightRetrieval (economics)": 19.85, - "BrightRetrieval (robotics)": 11.21, - "BrightRetrieval (psychology)": 24.79, - "BrightRetrieval (aops)": 7.45, - "BrightRetrieval (sustainable_living)": 15.58, - "BrightRetrieval (pony)": 1.48, - "BrightRetrieval (theoremqa_theorems)": 10.13, - "BrightRetrieval (biology)": 23.55, - "BrightRetrieval (stackoverflow)": 15.03, - "CQADupstackRetrieval": 46.6, - "ClimateFEVER": 32.65, - "DBPedia": 46.03, - "FEVER": 91.47, - "FiQA2018": 59.76, - "HotpotQA": 70.86, - "MSMARCO": 40.6, - "NFCorpus": 40.32, - "NQ": 65.92, - "QuoraRetrieval": 87.4, - "SCIDOCS": 24.32, - "SciFact": 79.99, - "TRECCOVID": 85.07, - "Touche2020": 39.16 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "voyage-large-2-instruct", - "BIOSSES": 89.24, - "SICK-R": 83.16, - "STS12": 73.34, - "STS13": 88.49, - "STS14": 86.49, - "STS15": 91.13, - "STS16": 85.68, - "STS17 (en-en)": 90.06, - "STS22 (en)": 66.32, - "STSBenchmark": 89.22 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "voyage-large-2-instruct", - "SummEval": 30.84 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "voyage-large-2-instruct" - } - ] - } - }, - "rubert-base-cased": { - "BitextMining": { - "f1": [ - { - "Model": "rubert-base-cased", - "Tatoeba (rus-Cyrl_eng-Latn)": 16.76 + "Model": "rubert-base-cased", + "Tatoeba (rus-Cyrl_eng-Latn)": 16.76 } ] }, @@ -7556,83 +7179,6 @@ ] } }, - "instructor-large": { - "BitextMining": { - "f1": [ - { - "Model": "instructor-large" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "instructor-large" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "instructor-large" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "instructor-large" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "instructor-large" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "instructor-large", - "BrightRetrieval (pony)": 1.32, - "BrightRetrieval (sustainable_living)": 13.16, - "BrightRetrieval (aops)": 7.94, - "BrightRetrieval (biology)": 15.61, - "BrightRetrieval (stackoverflow)": 11.21, - "BrightRetrieval (theoremqa_theorems)": 9.29, - "BrightRetrieval (psychology)": 21.94, - "BrightRetrieval (economics)": 15.99, - "BrightRetrieval (robotics)": 11.45, - "BrightRetrieval (leetcode)": 20.0, - "BrightRetrieval (earth_science)": 21.52, - "BrightRetrieval (theoremqa_questions)": 20.07 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "instructor-large" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "instructor-large" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "instructor-large" - } - ] - } - }, "bert-base-multilingual-uncased": { "BitextMining": { "f1": [ @@ -10137,111 +9683,11 @@ ] } }, - "bge-large-en-v1.5": { + "allenai-specter": { "BitextMining": { "f1": [ { - "Model": "bge-large-en-v1.5" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "bge-large-en-v1.5", - "AILACasedocs": 25.15, - "AILAStatutes": 20.74, - "ARCChallenge": 9.99, - "AlphaNLI": 13.13, - "BrightRetrieval (stackoverflow)": 9.51, - "BrightRetrieval (earth_science)": 24.15, - "BrightRetrieval (aops)": 6.08, - "BrightRetrieval (sustainable_living)": 13.27, - "BrightRetrieval (psychology)": 17.44, - "BrightRetrieval (robotics)": 12.21, - "BrightRetrieval (theoremqa_theorems)": 5.51, - "BrightRetrieval (pony)": 5.64, - "BrightRetrieval (biology)": 11.96, - "BrightRetrieval (theoremqa_questions)": 12.56, - "BrightRetrieval (leetcode)": 26.68, - "BrightRetrieval (economics)": 16.59, - "GerDaLIRSmall": 3.96, - "HellaSwag": 28.5, - "LeCaRDv2": 22.68, - "LegalBenchConsumerContractsQA": 73.52, - "LegalBenchCorporateLobbying": 91.51, - "LegalQuAD": 16.22, - "LegalSummarization": 59.99, - "PIQA": 27.99, - "Quail": 1.83, - "RARbCode": 48.12, - "RARbMath": 57.36, - "SIQA": 1.04, - "SpartQA": 2.99, - "TempReasonL1": 1.46, - "TempReasonL2Fact": 24.25, - "TempReasonL2Pure": 2.35, - "TempReasonL3Fact": 20.64, - "TempReasonL3Pure": 6.67, - "WinoGrande": 19.18 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "bge-large-en-v1.5" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "bge-large-en-v1.5" - } - ] - } - }, - "allenai-specter": { - "BitextMining": { - "f1": [ - { - "Model": "allenai-specter" + "Model": "allenai-specter" } ] }, @@ -11312,101 +10758,6 @@ ] } }, - "GritLM-7B": { - "BitextMining": { - "f1": [ - { - "Model": "GritLM-7B" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "GritLM-7B" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "GritLM-7B" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "GritLM-7B" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "GritLM-7B" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "GritLM-7B", - "ARCChallenge": 26.68, - "AlphaNLI": 34.0, - "BrightRetrieval (pony)": 21.98, - "BrightRetrieval (robotics)": 17.31, - "BrightRetrieval (economics)": 19.0, - "BrightRetrieval (theoremqa_questions)": 23.34, - "BrightRetrieval (leetcode)": 29.85, - "BrightRetrieval (earth_science)": 32.77, - "BrightRetrieval (stackoverflow)": 11.62, - "BrightRetrieval (sustainable_living)": 18.04, - "BrightRetrieval (biology)": 25.04, - "BrightRetrieval (psychology)": 19.92, - "BrightRetrieval (theoremqa_theorems)": 17.41, - "BrightRetrieval (aops)": 8.91, - "HellaSwag": 39.45, - "PIQA": 44.35, - "Quail": 11.69, - "RARbCode": 84.0, - "RARbMath": 82.35, - "SIQA": 7.23, - "SpartQA": 9.29, - "TempReasonL1": 7.15, - "TempReasonL2Fact": 58.38, - "TempReasonL2Pure": 11.22, - "TempReasonL3Fact": 44.29, - "TempReasonL3Pure": 14.15, - "WinoGrande": 53.74 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "GritLM-7B" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "GritLM-7B" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "GritLM-7B", - "Core17InstructionRetrieval": 2.62, - "News21InstructionRetrieval": -1.01, - "Robust04InstructionRetrieval": -1.68 - } - ] - } - }, "bge-m3": { "BitextMining": { "f1": [ @@ -13236,163 +12587,6 @@ ] } }, - "gte-Qwen2-7B-instruct": { - "BitextMining": { - "f1": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "gte-Qwen2-7B-instruct", - "BrightRetrieval (earth_science)": 40.66, - "BrightRetrieval (sustainable_living)": 20.82, - "BrightRetrieval (theoremqa_theorems)": 28.15, - "BrightRetrieval (aops)": 15.1, - "BrightRetrieval (economics)": 16.18, - "BrightRetrieval (pony)": 1.25, - "BrightRetrieval (stackoverflow)": 13.95, - "BrightRetrieval (leetcode)": 31.07, - "BrightRetrieval (biology)": 32.09, - "BrightRetrieval (theoremqa_questions)": 29.9, - "BrightRetrieval (robotics)": 12.82, - "BrightRetrieval (psychology)": 26.58 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "gte-Qwen2-7B-instruct" - } - ] - } - }, - "instructor-xl": { - "BitextMining": { - "f1": [ - { - "Model": "instructor-xl" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "instructor-xl" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "instructor-xl" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "instructor-xl" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "instructor-xl" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "instructor-xl", - "BrightRetrieval (aops)": 8.26, - "BrightRetrieval (robotics)": 17.39, - "BrightRetrieval (economics)": 22.81, - "BrightRetrieval (stackoverflow)": 19.06, - "BrightRetrieval (leetcode)": 27.5, - "BrightRetrieval (theoremqa_questions)": 14.59, - "BrightRetrieval (psychology)": 27.43, - "BrightRetrieval (biology)": 21.91, - "BrightRetrieval (theoremqa_theorems)": 6.5, - "BrightRetrieval (earth_science)": 34.35, - "BrightRetrieval (sustainable_living)": 18.82, - "BrightRetrieval (pony)": 5.02 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "instructor-xl" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "instructor-xl" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "instructor-xl", - "Core17InstructionRetrieval": 0.69, - "News21InstructionRetrieval": -0.9, - "Robust04InstructionRetrieval": -8.08 - } - ] - } - }, "mistral-embed": { "BitextMining": { "f1": [ @@ -13734,154 +12928,18 @@ ] } }, - "google-gecko.text-embedding-preview-0409": { + "jina-embeddings-v2-base-en": { "BitextMining": { "f1": [ { - "Model": "google-gecko.text-embedding-preview-0409" + "Model": "jina-embeddings-v2-base-en" } ] }, "Classification": { "accuracy": [ { - "Model": "google-gecko.text-embedding-preview-0409", - "AmazonCounterfactualClassification (en)": 75.34, - "AmazonPolarityClassification": 97.34, - "AmazonReviewsClassification (en)": 51.17, - "Banking77Classification": 88.62, - "EmotionClassification": 52.51, - "ImdbClassification": 95.65, - "MTOPDomainClassification (en)": 98.35, - "MTOPIntentClassification (en)": 83.43, - "MassiveIntentClassification (en)": 80.22, - "MassiveScenarioClassification (en)": 87.19, - "ToxicConversationsClassification": 89.67, - "TweetSentimentExtractionClassification": 74.52 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "ArxivClusteringP2P": 46.27, - "ArxivClusteringS2S": 38.36, - "BiorxivClusteringP2P": 37.87, - "BiorxivClusteringS2S": 35.67, - "MedrxivClusteringP2P": 33.11, - "MedrxivClusteringS2S": 31.54, - "RedditClustering": 65.81, - "RedditClusteringP2P": 66.62, - "StackExchangeClustering": 74.52, - "StackExchangeClusteringP2P": 37.63, - "TwentyNewsgroupsClustering": 54.87 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "SprintDuplicateQuestions": 96.26, - "TwitterSemEval2015": 79.04, - "TwitterURLCorpus": 87.53 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "AskUbuntuDupQuestions": 64.4, - "MindSmallReranking": 33.07, - "SciDocsRR": 83.59, - "StackOverflowDupQuestions": 54.56 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "ArguAna": 62.18, - "BrightRetrieval (earth_science)": 34.38, - "BrightRetrieval (leetcode)": 29.64, - "BrightRetrieval (theoremqa_questions)": 21.51, - "BrightRetrieval (aops)": 9.33, - "BrightRetrieval (sustainable_living)": 17.25, - "BrightRetrieval (pony)": 3.59, - "BrightRetrieval (theoremqa_theorems)": 16.77, - "BrightRetrieval (stackoverflow)": 17.93, - "BrightRetrieval (biology)": 22.98, - "BrightRetrieval (robotics)": 15.98, - "BrightRetrieval (economics)": 19.5, - "BrightRetrieval (psychology)": 27.86, - "CQADupstackRetrieval": 48.89, - "ClimateFEVER": 33.21, - "DBPedia": 47.12, - "FEVER": 86.96, - "FiQA2018": 59.24, - "HotpotQA": 71.33, - "MSMARCO": 32.58, - "NFCorpus": 40.33, - "NQ": 61.28, - "QuoraRetrieval": 88.18, - "SCIDOCS": 20.34, - "SciFact": 75.42, - "TRECCOVID": 82.62, - "Touche2020": 25.86 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "BIOSSES": 89.46, - "SICK-R": 81.93, - "STS12": 77.59, - "STS13": 90.36, - "STS14": 85.25, - "STS15": 89.66, - "STS16": 87.34, - "STS17 (en-en)": 92.06, - "STS22 (en)": 68.02, - "STSBenchmark": 88.99 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "SummEval": 32.63 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "google-gecko.text-embedding-preview-0409", - "Core17InstructionRetrieval": 5.44, - "News21InstructionRetrieval": 3.94, - "Robust04InstructionRetrieval": -2.4 - } - ] - } - }, - "jina-embeddings-v2-base-en": { - "BitextMining": { - "f1": [ - { - "Model": "jina-embeddings-v2-base-en" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "jina-embeddings-v2-base-en" + "Model": "jina-embeddings-v2-base-en" } ] }, @@ -15850,162 +14908,6 @@ ] } }, - "e5-mistral-7b-instruct": { - "BitextMining": { - "f1": [ - { - "Model": "e5-mistral-7b-instruct", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.75 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "e5-mistral-7b-instruct", - "AmazonReviewsClassification (fr)": 36.71, - "GeoreviewClassification (rus-Cyrl)": 50.25, - "HeadlineClassification (rus-Cyrl)": 85.68, - "InappropriatenessClassification (rus-Cyrl)": 67.19, - "KinopoiskClassification (rus-Cyrl)": 65.49, - "MTOPDomainClassification (fr)": 74.8, - "MTOPIntentClassification (fr)": 53.97, - "MasakhaNEWSClassification (fra)": 80.59, - "MassiveIntentClassification (rus-Cyrl)": 76.08, - "MassiveIntentClassification (fr)": 46.39, - "MassiveScenarioClassification (rus-Cyrl)": 79.61, - "MassiveScenarioClassification (fr)": 53.86, - "RuReviewsClassification (rus-Cyrl)": 67.68, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 64.59, - "RuSciBenchOECDClassification (rus-Cyrl)": 51.13 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "e5-mistral-7b-instruct", - "AlloProfClusteringP2P": 61.06, - "AlloProfClusteringS2S": 28.12, - "GeoreviewClusteringP2P (rus-Cyrl)": 65.68, - "HALClusteringS2S": 19.69, - "MLSUMClusteringP2P": 45.59, - "MLSUMClusteringS2S": 32.0, - "MasakhaNEWSClusteringP2P (fra)": 52.47, - "MasakhaNEWSClusteringS2S (fra)": 49.2, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 61.55, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 52.72 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "e5-mistral-7b-instruct", - "OpusparcusPC (rus-Cyrl)": 91.44, - "OpusparcusPC (fr)": 88.5, - "PawsXPairClassification (fr)": 63.65, - "TERRa (rus-Cyrl)": 59.38 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "e5-mistral-7b-instruct", - "AlloprofReranking": 47.36, - "RuBQReranking (rus-Cyrl)": 74.61, - "SyntecReranking": 77.05 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "e5-mistral-7b-instruct", - "AILACasedocs": 38.76, - "AILAStatutes": 38.07, - "ARCChallenge": 17.81, - "AlloprofRetrieval": 16.46, - "AlphaNLI": 26.12, - "BSARDRetrieval": 0.0, - "BrightRetrieval (sustainable_living)": 18.51, - "BrightRetrieval (economics)": 15.49, - "BrightRetrieval (theoremqa_theorems)": 23.78, - "BrightRetrieval (aops)": 7.1, - "BrightRetrieval (theoremqa_questions)": 23.94, - "BrightRetrieval (stackoverflow)": 9.83, - "BrightRetrieval (psychology)": 15.79, - "BrightRetrieval (pony)": 4.81, - "BrightRetrieval (leetcode)": 28.72, - "BrightRetrieval (biology)": 18.84, - "BrightRetrieval (earth_science)": 25.96, - "BrightRetrieval (robotics)": 16.37, - "GerDaLIRSmall": 37.18, - "HellaSwag": 34.85, - "LEMBNarrativeQARetrieval": 44.62, - "LEMBNeedleRetrieval": 48.25, - "LEMBPasskeyRetrieval": 71.0, - "LEMBQMSumRetrieval": 43.63, - "LEMBSummScreenFDRetrieval": 96.82, - "LEMBWikimQARetrieval": 82.11, - "LeCaRDv2": 68.56, - "LegalBenchConsumerContractsQA": 75.46, - "LegalBenchCorporateLobbying": 94.01, - "LegalQuAD": 59.64, - "LegalSummarization": 66.51, - "MintakaRetrieval (fr)": 3.57, - "PIQA": 39.37, - "Quail": 7.01, - "RARbCode": 78.46, - "RARbMath": 72.16, - "RiaNewsRetrieval (rus-Cyrl)": 81.94, - "RuBQRetrieval (rus-Cyrl)": 73.98, - "SIQA": 5.42, - "SpartQA": 9.92, - "SyntecRetrieval": 55.9, - "TempReasonL1": 3.31, - "TempReasonL2Fact": 36.9, - "TempReasonL2Pure": 9.18, - "TempReasonL3Fact": 30.18, - "TempReasonL3Pure": 14.31, - "WinoGrande": 41.21, - "XPQARetrieval (fr)": 41.29 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "e5-mistral-7b-instruct", - "RUParaPhraserSTS (rus-Cyrl)": 76.17, - "RuSTSBenchmarkSTS (rus-Cyrl)": 84.13, - "SICKFr": 64.39, - "STS22 (fr)": 69.82, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 84.25, - "STSBenchmarkMultilingualSTS (fr)": 61.87 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "e5-mistral-7b-instruct", - "SummEvalFr": 32.22 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "e5-mistral-7b-instruct", - "Core17InstructionRetrieval": 0.09, - "News21InstructionRetrieval": -0.86, - "Robust04InstructionRetrieval": -9.59 - } - ] - } - }, "gbert-large": { "BitextMining": { "f1": [ @@ -19942,4872 +18844,5868 @@ ] } }, - "all-mpnet-base-v2": { + "e5-base-v2": { "BitextMining": { "f1": [ { - "Model": "all-mpnet-base-v2", - "BornholmBitextMining (dan-Latn)": 27.44, - "Tatoeba (pol-Latn_eng-Latn)": 4.09, - "Tatoeba (ita-Latn_eng-Latn)": 11.1, - "Tatoeba (cat-Latn_eng-Latn)": 9.44, - "Tatoeba (aze-Latn_eng-Latn)": 1.49, - "Tatoeba (eus-Latn_eng-Latn)": 3.94, - "Tatoeba (epo-Latn_eng-Latn)": 7.15, - "Tatoeba (lit-Latn_eng-Latn)": 1.02, - "Tatoeba (ast-Latn_eng-Latn)": 9.78, - "Tatoeba (bul-Cyrl_eng-Latn)": 0.35, - "Tatoeba (ceb-Latn_eng-Latn)": 4.41, - "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, - "Tatoeba (tzl-Latn_eng-Latn)": 3.55, - "Tatoeba (zsm-Latn_eng-Latn)": 4.75, - "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17, - "Tatoeba (pam-Latn_eng-Latn)": 4.32, - "Tatoeba (amh-Ethi_eng-Latn)": 0.0, - "Tatoeba (slv-Latn_eng-Latn)": 3.73, - "Tatoeba (lvs-Latn_eng-Latn)": 2.98, - "Tatoeba (sqi-Latn_eng-Latn)": 3.45, - "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, - "Tatoeba (vie-Latn_eng-Latn)": 4.96, - "Tatoeba (pes-Arab_eng-Latn)": 0.2, - "Tatoeba (por-Latn_eng-Latn)": 10.48, - "Tatoeba (dtp-Latn_eng-Latn)": 3.54, - "Tatoeba (yid-Hebr_eng-Latn)": 0.08, - "Tatoeba (isl-Latn_eng-Latn)": 3.86, - "Tatoeba (cha-Latn_eng-Latn)": 12.2, - "Tatoeba (ron-Latn_eng-Latn)": 7.34, - "Tatoeba (hye-Armn_eng-Latn)": 0.14, - "Tatoeba (mar-Deva_eng-Latn)": 0.11, - "Tatoeba (hin-Deva_eng-Latn)": 0.02, - "Tatoeba (kor-Hang_eng-Latn)": 0.32, - "Tatoeba (srp-Cyrl_eng-Latn)": 1.89, - "Tatoeba (csb-Latn_eng-Latn)": 4.19, - "Tatoeba (jpn-Jpan_eng-Latn)": 1.71, - "Tatoeba (ber-Tfng_eng-Latn)": 4.56, - "Tatoeba (wuu-Hans_eng-Latn)": 0.91, - "Tatoeba (jav-Latn_eng-Latn)": 3.17, - "Tatoeba (nob-Latn_eng-Latn)": 4.37, - "Tatoeba (bre-Latn_eng-Latn)": 3.65, - "Tatoeba (kzj-Latn_eng-Latn)": 3.62, - "Tatoeba (urd-Arab_eng-Latn)": 0.0, - "Tatoeba (ces-Latn_eng-Latn)": 3.56, - "Tatoeba (cbk-Latn_eng-Latn)": 9.33, - "Tatoeba (gla-Latn_eng-Latn)": 2.04, - "Tatoeba (war-Latn_eng-Latn)": 5.14, - "Tatoeba (swh-Latn_eng-Latn)": 6.01, - "Tatoeba (swg-Latn_eng-Latn)": 7.86, - "Tatoeba (glg-Latn_eng-Latn)": 12.0, - "Tatoeba (fao-Latn_eng-Latn)": 7.08, - "Tatoeba (gsw-Latn_eng-Latn)": 10.67, - "Tatoeba (rus-Cyrl_eng-Latn)": 0.14, - "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52, - "Tatoeba (gle-Latn_eng-Latn)": 2.19, - "Tatoeba (slk-Latn_eng-Latn)": 3.4, - "Tatoeba (nno-Latn_eng-Latn)": 5.75, - "Tatoeba (cor-Latn_eng-Latn)": 2.42, - "Tatoeba (nov-Latn_eng-Latn)": 16.61, - "Tatoeba (swe-Latn_eng-Latn)": 6.55, - "Tatoeba (max-Deva_eng-Latn)": 6.46, - "Tatoeba (oci-Latn_eng-Latn)": 8.57, - "Tatoeba (lfn-Latn_eng-Latn)": 6.1, - "Tatoeba (fra-Latn_eng-Latn)": 16.9, - "Tatoeba (ben-Beng_eng-Latn)": 0.0, - "Tatoeba (bel-Cyrl_eng-Latn)": 0.65, - "Tatoeba (lat-Latn_eng-Latn)": 5.78, - "Tatoeba (cmn-Hans_eng-Latn)": 2.22, - "Tatoeba (kat-Geor_eng-Latn)": 0.43, - "Tatoeba (bos-Latn_eng-Latn)": 4.6, - "Tatoeba (xho-Latn_eng-Latn)": 3.3, - "Tatoeba (tha-Thai_eng-Latn)": 0.0, - "Tatoeba (cym-Latn_eng-Latn)": 4.88, - "Tatoeba (deu-Latn_eng-Latn)": 11.46, - "Tatoeba (awa-Deva_eng-Latn)": 0.44, - "Tatoeba (ido-Latn_eng-Latn)": 9.84, - "Tatoeba (tat-Cyrl_eng-Latn)": 0.24, - "Tatoeba (kab-Latn_eng-Latn)": 1.31, - "Tatoeba (uzb-Latn_eng-Latn)": 1.98, - "Tatoeba (heb-Hebr_eng-Latn)": 0.28, - "Tatoeba (ara-Arab_eng-Latn)": 0.1, - "Tatoeba (fry-Latn_eng-Latn)": 12.43, - "Tatoeba (afr-Latn_eng-Latn)": 6.08, - "Tatoeba (kur-Latn_eng-Latn)": 3.65, - "Tatoeba (pms-Latn_eng-Latn)": 7.63, - "Tatoeba (ell-Grek_eng-Latn)": 0.0, - "Tatoeba (spa-Latn_eng-Latn)": 10.12, - "Tatoeba (dsb-Latn_eng-Latn)": 2.96, - "Tatoeba (uig-Arab_eng-Latn)": 0.33, - "Tatoeba (nld-Latn_eng-Latn)": 9.29, - "Tatoeba (tel-Telu_eng-Latn)": 0.73, - "Tatoeba (hrv-Latn_eng-Latn)": 3.77, - "Tatoeba (nds-Latn_eng-Latn)": 10.96, - "Tatoeba (hun-Latn_eng-Latn)": 3.23, - "Tatoeba (est-Latn_eng-Latn)": 2.35, - "Tatoeba (mal-Mlym_eng-Latn)": 0.15, - "Tatoeba (khm-Khmr_eng-Latn)": 0.28, - "Tatoeba (hsb-Latn_eng-Latn)": 3.12, - "Tatoeba (tgl-Latn_eng-Latn)": 4.06, - "Tatoeba (ang-Latn_eng-Latn)": 9.77, - "Tatoeba (tur-Latn_eng-Latn)": 3.16, - "Tatoeba (tuk-Latn_eng-Latn)": 2.23, - "Tatoeba (ile-Latn_eng-Latn)": 17.84, - "Tatoeba (mon-Cyrl_eng-Latn)": 0.81, - "Tatoeba (yue-Hant_eng-Latn)": 1.16, - "Tatoeba (ina-Latn_eng-Latn)": 22.55, - "Tatoeba (tam-Taml_eng-Latn)": 0.73, - "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5, - "Tatoeba (dan-Latn_eng-Latn)": 10.01, - "Tatoeba (arq-Arab_eng-Latn)": 0.33, - "Tatoeba (arz-Arab_eng-Latn)": 0.0, - "Tatoeba (fin-Latn_eng-Latn)": 3.82, - "Tatoeba (ind-Latn_eng-Latn)": 4.88 + "Model": "e5-base-v2" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "e5-base-v2" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "e5-base-v2", + "BiorxivClusteringP2P": 37.12, + "BiorxivClusteringS2S": 33.41, + "MedrxivClusteringP2P": 31.82, + "MedrxivClusteringS2S": 29.68, + "RedditClustering": 56.54, + "RedditClusteringP2P": 63.23, + "StackExchangeClustering": 64.6, + "StackExchangeClusteringP2P": 33.02, + "TwentyNewsgroupsClustering": 49.86 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "e5-base-v2" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "e5-base-v2" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-base-v2" + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "e5-base-v2" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "e5-base-v2" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "e5-base-v2", + "Core17InstructionRetrieval": -2.9, + "News21InstructionRetrieval": -2.0, + "Robust04InstructionRetrieval": -6.73 + } + ] + } + }, + "text-search-babbage-001": { + "BitextMining": { + "f1": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-search-babbage-001", + "ArguAna": 49.2, + "ClimateFEVER": 19.9, + "FEVER": 77.0, + "FiQA2018": 42.2, + "HotpotQA": 63.1, + "NFCorpus": 36.7, + "QuoraRetrieval": 69.7, + "SciFact": 70.4, + "TRECCOVID": 58.5, + "Touche2020": 29.7 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-search-babbage-001" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-search-babbage-001" + } + ] + } + }, + "all-mpnet-base-v2-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "all-mpnet-base-v2-instruct", + "ARCChallenge": 10.35, + "AlphaNLI": 1.96, + "HellaSwag": 13.01, + "PIQA": 27.18, + "Quail": 3.02, + "RARbCode": 48.95, + "RARbMath": 69.21, + "SIQA": 1.29, + "SpartQA": 1.01, + "TempReasonL1": 1.52, + "TempReasonL2Fact": 7.28, + "TempReasonL2Pure": 1.03, + "TempReasonL3Fact": 7.03, + "TempReasonL3Pure": 5.16, + "WinoGrande": 9.66 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + } + }, + "bge-small-en-v1.5-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-small-en-v1.5-instruct", + "ARCChallenge": 7.72, + "AlphaNLI": 1.26, + "HellaSwag": 23.41, + "PIQA": 20.79, + "Quail": 2.01, + "RARbCode": 41.52, + "RARbMath": 46.5, + "SIQA": 0.98, + "SpartQA": 2.86, + "TempReasonL1": 1.27, + "TempReasonL2Fact": 16.72, + "TempReasonL2Pure": 1.1, + "TempReasonL3Fact": 12.81, + "TempReasonL3Pure": 4.63, + "WinoGrande": 5.35 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-small-en-v1.5-instruct" + } + ] + } + }, + "LaBSE-en-ru": { + "BitextMining": { + "f1": [ + { + "Model": "LaBSE-en-ru", + "Tatoeba (rus-Cyrl_eng-Latn)": 93.62 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "LaBSE-en-ru", + "GeoreviewClassification (rus-Cyrl)": 40.89, + "HeadlineClassification (rus-Cyrl)": 68.75, + "InappropriatenessClassification (rus-Cyrl)": 58.48, + "KinopoiskClassification (rus-Cyrl)": 49.85, + "MassiveIntentClassification (swa-Latn)": 19.98, + "MassiveIntentClassification (aze-Latn)": 19.52, + "MassiveIntentClassification (tur-Latn)": 24.12, + "MassiveIntentClassification (cmo-Hans)": 3.96, + "MassiveIntentClassification (amh-Ethi)": 2.76, + "MassiveIntentClassification (kan-Knda)": 2.86, + "MassiveIntentClassification (hin-Deva)": 3.29, + "MassiveIntentClassification (tgl-Latn)": 27.08, + "MassiveIntentClassification (tha-Thai)": 4.0, + "MassiveIntentClassification (swe-Latn)": 32.01, + "MassiveIntentClassification (deu-Latn)": 35.14, + "MassiveIntentClassification (spa-Latn)": 37.67, + "MassiveIntentClassification (por-Latn)": 39.84, + "MassiveIntentClassification (jpn-Jpan)": 4.78, + "MassiveIntentClassification (fin-Latn)": 31.11, + "MassiveIntentClassification (kat-Geor)": 2.87, + "MassiveIntentClassification (slv-Latn)": 35.66, + "MassiveIntentClassification (rus-Cyrl)": 60.53, + "MassiveIntentClassification (ita-Latn)": 43.32, + "MassiveIntentClassification (tel-Telu)": 2.72, + "MassiveIntentClassification (afr-Latn)": 30.59, + "MassiveIntentClassification (isl-Latn)": 25.61, + "MassiveIntentClassification (fas-Arab)": 3.71, + "MassiveIntentClassification (vie-Latn)": 23.0, + "MassiveIntentClassification (ben-Beng)": 3.35, + "MassiveIntentClassification (hye-Armn)": 2.8, + "MassiveIntentClassification (pol-Latn)": 31.3, + "MassiveIntentClassification (cym-Latn)": 26.59, + "MassiveIntentClassification (jav-Latn)": 26.84, + "MassiveIntentClassification (mon-Cyrl)": 35.97, + "MassiveIntentClassification (en)": 60.48, + "MassiveIntentClassification (msa-Latn)": 27.82, + "MassiveIntentClassification (nob-Latn)": 35.78, + "MassiveIntentClassification (heb-Hebr)": 2.33, + "MassiveIntentClassification (khm-Khmr)": 4.6, + "MassiveIntentClassification (nld-Latn)": 34.66, + "MassiveIntentClassification (ind-Latn)": 33.31, + "MassiveIntentClassification (mal-Mlym)": 2.63, + "MassiveIntentClassification (tam-Taml)": 2.22, + "MassiveIntentClassification (mya-Mymr)": 3.57, + "MassiveIntentClassification (urd-Arab)": 3.36, + "MassiveIntentClassification (dan-Latn)": 38.66, + "MassiveIntentClassification (cmo-Hant)": 5.29, + "MassiveIntentClassification (ron-Latn)": 37.45, + "MassiveIntentClassification (lav-Latn)": 23.92, + "MassiveIntentClassification (fra-Latn)": 40.29, + "MassiveIntentClassification (ell-Grek)": 11.14, + "MassiveIntentClassification (sqi-Latn)": 35.84, + "MassiveIntentClassification (hun-Latn)": 26.74, + "MassiveIntentClassification (kor-Kore)": 2.69, + "MassiveIntentClassification (ara-Arab)": 5.19, + "MassiveScenarioClassification (swa-Latn)": 25.61, + "MassiveScenarioClassification (aze-Latn)": 24.48, + "MassiveScenarioClassification (tur-Latn)": 31.38, + "MassiveScenarioClassification (cmo-Hans)": 9.98, + "MassiveScenarioClassification (amh-Ethi)": 7.59, + "MassiveScenarioClassification (kan-Knda)": 8.73, + "MassiveScenarioClassification (hin-Deva)": 8.77, + "MassiveScenarioClassification (tgl-Latn)": 35.12, + "MassiveScenarioClassification (tha-Thai)": 8.69, + "MassiveScenarioClassification (swe-Latn)": 35.83, + "MassiveScenarioClassification (deu-Latn)": 41.72, + "MassiveScenarioClassification (spa-Latn)": 43.33, + "MassiveScenarioClassification (por-Latn)": 44.62, + "MassiveScenarioClassification (jpn-Jpan)": 9.51, + "MassiveScenarioClassification (fin-Latn)": 33.79, + "MassiveScenarioClassification (kat-Geor)": 7.32, + "MassiveScenarioClassification (slv-Latn)": 37.6, + "MassiveScenarioClassification (rus-Cyrl)": 65.15, + "MassiveScenarioClassification (ita-Latn)": 47.28, + "MassiveScenarioClassification (tel-Telu)": 7.53, + "MassiveScenarioClassification (afr-Latn)": 37.27, + "MassiveScenarioClassification (isl-Latn)": 30.32, + "MassiveScenarioClassification (fas-Arab)": 6.83, + "MassiveScenarioClassification (vie-Latn)": 28.92, + "MassiveScenarioClassification (ben-Beng)": 8.57, + "MassiveScenarioClassification (hye-Armn)": 8.91, + "MassiveScenarioClassification (pol-Latn)": 33.75, + "MassiveScenarioClassification (cym-Latn)": 30.38, + "MassiveScenarioClassification (jav-Latn)": 33.94, + "MassiveScenarioClassification (mon-Cyrl)": 41.53, + "MassiveScenarioClassification (en)": 65.43, + "MassiveScenarioClassification (msa-Latn)": 36.28, + "MassiveScenarioClassification (nob-Latn)": 42.43, + "MassiveScenarioClassification (heb-Hebr)": 8.64, + "MassiveScenarioClassification (khm-Khmr)": 9.99, + "MassiveScenarioClassification (nld-Latn)": 41.47, + "MassiveScenarioClassification (ind-Latn)": 39.05, + "MassiveScenarioClassification (mal-Mlym)": 7.24, + "MassiveScenarioClassification (tam-Taml)": 7.71, + "MassiveScenarioClassification (mya-Mymr)": 9.94, + "MassiveScenarioClassification (urd-Arab)": 9.16, + "MassiveScenarioClassification (dan-Latn)": 44.69, + "MassiveScenarioClassification (cmo-Hant)": 10.48, + "MassiveScenarioClassification (ron-Latn)": 44.55, + "MassiveScenarioClassification (lav-Latn)": 26.26, + "MassiveScenarioClassification (fra-Latn)": 45.08, + "MassiveScenarioClassification (ell-Grek)": 19.46, + "MassiveScenarioClassification (sqi-Latn)": 40.9, + "MassiveScenarioClassification (hun-Latn)": 33.92, + "MassiveScenarioClassification (kor-Kore)": 7.37, + "MassiveScenarioClassification (ara-Arab)": 12.43, + "RuReviewsClassification (rus-Cyrl)": 58.01, + "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.8, + "RuSciBenchOECDClassification (rus-Cyrl)": 40.36 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "LaBSE-en-ru", + "GeoreviewClusteringP2P (rus-Cyrl)": 51.89, + "MLSUMClusteringP2P (rus-Cyrl)": 37.87, + "MLSUMClusteringS2S (rus-Cyrl)": 41.24, + "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.48, + "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.16 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "LaBSE-en-ru", + "OpusparcusPC (rus-Cyrl)": 87.18, + "TERRa (rus-Cyrl)": 55.61 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "LaBSE-en-ru", + "RuBQReranking (rus-Cyrl)": 54.83 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "LaBSE-en-ru", + "RiaNewsRetrieval (rus-Cyrl)": 34.73, + "RuBQRetrieval (rus-Cyrl)": 29.03 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "LaBSE-en-ru", + "RUParaPhraserSTS (rus-Cyrl)": 65.87, + "RuSTSBenchmarkSTS (rus-Cyrl)": 73.32, + "STS22 (deu-Latn)": 38.9, + "STS22 (en)": 59.47, + "STS22 (pol-Latn_eng-Latn)": 58.73, + "STS22 (spa-Latn)": 60.85, + "STS22 (fra-Latn)": 74.98, + "STS22 (deu-Latn_eng-Latn)": 47.98, + "STS22 (deu-Latn_fra-Latn)": 59.4, + "STS22 (deu-Latn_pol-Latn)": 39.48, + "STS22 (pol-Latn)": 32.74, + "STS22 (tur-Latn)": 55.04, + "STS22 (spa-Latn_eng-Latn)": 70.8, + "STS22 (rus-Cyrl)": 58.53, + "STS22 (ita-Latn)": 68.58, + "STS22 (fra-Latn_pol-Latn)": 61.98, + "STS22 (spa-Latn_ita-Latn)": 66.83, + "STS22 (cmn-Hans_eng-Latn)": 24.98, + "STS22 (ara-Arab)": 31.85, + "STS22 (cmn-Hans)": 35.1, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.02 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "LaBSE-en-ru" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "LaBSE-en-ru" + } + ] + } + }, + "text2vec-base-chinese": { + "BitextMining": { + "f1": [ + { + "Model": "text2vec-base-chinese" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text2vec-base-chinese", + "AmazonReviewsClassification (zh)": 34.12, + "IFlyTek": 42.05, + "JDReview": 82.14, + "MassiveIntentClassification (zh-CN)": 63.98, + "MassiveScenarioClassification (zh-CN)": 70.52, + "MultilingualSentiment": 60.98, + "OnlineShopping": 85.69, + "TNews": 43.01, + "Waimai": 77.22 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text2vec-base-chinese", + "CLSClusteringP2P": 35.27, + "CLSClusteringS2S": 32.42, + "ThuNewsClusteringP2P": 42.92, + "ThuNewsClusteringS2S": 40.01 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text2vec-base-chinese", + "Cmnli": 73.87, + "Ocnli": 60.95 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text2vec-base-chinese", + "CMedQAv1": 59.26, + "CMedQAv2": 59.82, + "MMarcoReranking": 12.76, + "T2Reranking": 65.95 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text2vec-base-chinese", + "CmedqaRetrieval": 15.91, + "CovidRetrieval": 44.81, + "DuRetrieval": 52.23, + "EcomRetrieval": 34.6, + "MMarcoRetrieval": 44.06, + "MedicalRetrieval": 27.56, + "T2Retrieval": 51.67, + "VideoRetrieval": 39.52 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text2vec-base-chinese", + "AFQMC": 26.06, + "ATEC": 31.93, + "BQ": 42.67, + "LCQMC": 70.16, + "PAWSX": 17.21, + "QBQTC": 24.62, + "STS22 (zh)": 55.35, + "STSB": 79.3 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text2vec-base-chinese" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text2vec-base-chinese" + } + ] + } + }, + "herbert-base-retrieval-v2": { + "BitextMining": { + "f1": [ + { + "Model": "herbert-base-retrieval-v2" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "herbert-base-retrieval-v2", + "AllegroReviews": 34.11, + "CBD": 68.35, + "MassiveIntentClassification (pl)": 65.53, + "MassiveScenarioClassification (pl)": 68.51, + "PAC": 68.4, + "PolEmo2.0-IN": 64.18, + "PolEmo2.0-OUT": 45.73 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "herbert-base-retrieval-v2", + "8TagsClustering": 28.15 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "herbert-base-retrieval-v2", + "CDSC-E": 63.31, + "PPC": 84.18, + "PSC": 98.87, + "SICK-E-PL": 54.93 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "herbert-base-retrieval-v2" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "herbert-base-retrieval-v2", + "ArguAna-PL": 41.97, + "DBPedia-PL": 24.07, + "FiQA-PL": 24.25, + "HotpotQA-PL": 43.41, + "MSMARCO-PL": 51.56, + "NFCorpus-PL": 25.95, + "NQ-PL": 35.09, + "Quora-PL": 78.86, + "SCIDOCS-PL": 11.0, + "SciFact-PL": 51.92, + "TRECCOVID-PL": 42.64 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "herbert-base-retrieval-v2", + "CDSC-R": 86.18, + "SICK-R-PL": 64.67, + "STS22 (pl)": 39.73 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "herbert-base-retrieval-v2" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "herbert-base-retrieval-v2" + } + ] + } + }, + "voyage-law-2": { + "BitextMining": { + "f1": [ + { + "Model": "voyage-law-2" } ] }, "Classification": { "accuracy": [ { - "Model": "all-mpnet-base-v2", - "AllegroReviews (pol-Latn)": 22.99, - "AmazonCounterfactualClassification (en-ext)": 67.5, - "AmazonCounterfactualClassification (en)": 65.03, - "AmazonCounterfactualClassification (deu-Latn)": 55.66, - "AmazonCounterfactualClassification (jpn-Jpan)": 60.69, - "AmazonPolarityClassification": 67.14, - "AmazonReviewsClassification (en)": 31.44, - "AmazonReviewsClassification (deu-Latn)": 26.05, - "AmazonReviewsClassification (spa-Latn)": 27.73, - "AmazonReviewsClassification (fra-Latn)": 28.49, - "AmazonReviewsClassification (jpn-Jpan)": 23.65, - "AmazonReviewsClassification (cmn-Hans)": 23.62, - "AngryTweetsClassification (dan-Latn)": 44.13, - "Banking77Classification": 81.7, - "CBD (pol-Latn)": 50.25, - "DanishPoliticalCommentsClassification (dan-Latn)": 28.31, - "EmotionClassification": 42.22, - "GeoreviewClassification (rus-Cyrl)": 25.93, - "HeadlineClassification (rus-Cyrl)": 28.53, - "IFlyTek (cmn-Hans)": 17.18, - "ImdbClassification": 71.17, - "InappropriatenessClassification (rus-Cyrl)": 51.82, - "JDReview (cmn-Hans)": 60.19, - "KinopoiskClassification (rus-Cyrl)": 34.18, - "LccSentimentClassification (dan-Latn)": 39.27, - "MTOPDomainClassification (en)": 91.89, - "MTOPDomainClassification (deu-Latn)": 71.86, - "MTOPDomainClassification (spa-Latn)": 71.3, - "MTOPDomainClassification (fra-Latn)": 74.88, - "MTOPDomainClassification (hin-Deva)": 39.93, - "MTOPDomainClassification (tha-Thai)": 17.54, - "MTOPIntentClassification (en)": 68.27, - "MTOPIntentClassification (deu-Latn)": 44.36, - "MTOPIntentClassification (spa-Latn)": 39.48, - "MTOPIntentClassification (fra-Latn)": 37.57, - "MTOPIntentClassification (hin-Deva)": 18.63, - "MTOPIntentClassification (tha-Thai)": 5.42, - "MasakhaNEWSClassification (amh-Ethi)": 36.49, - "MasakhaNEWSClassification (eng)": 79.75, - "MasakhaNEWSClassification (fra-Latn)": 77.77, - "MasakhaNEWSClassification (hau-Latn)": 59.22, - "MasakhaNEWSClassification (ibo-Latn)": 61.64, - "MasakhaNEWSClassification (lin-Latn)": 74.0, - "MasakhaNEWSClassification (lug-Latn)": 58.43, - "MasakhaNEWSClassification (orm-Ethi)": 48.15, - "MasakhaNEWSClassification (pcm-Latn)": 92.2, - "MasakhaNEWSClassification (run-Latn)": 64.72, - "MasakhaNEWSClassification (sna-Latn)": 73.69, - "MasakhaNEWSClassification (som-Latn)": 49.97, - "MasakhaNEWSClassification (swa-Latn)": 55.15, - "MasakhaNEWSClassification (tir-Ethi)": 27.46, - "MasakhaNEWSClassification (xho-Latn)": 60.98, - "MasakhaNEWSClassification (yor-Latn)": 63.33, - "MassiveIntentClassification (en)": 69.76, - "MassiveIntentClassification (jav-Latn)": 31.75, - "MassiveIntentClassification (fra-Latn)": 44.27, - "MassiveIntentClassification (msa-Latn)": 30.53, - "MassiveIntentClassification (hun-Latn)": 34.38, - "MassiveIntentClassification (pol-Latn)": 34.26, - "MassiveIntentClassification (nld-Latn)": 38.49, - "MassiveIntentClassification (tha-Thai)": 8.51, - "MassiveIntentClassification (tur-Latn)": 32.02, - "MassiveIntentClassification (tam-Taml)": 9.25, - "MassiveIntentClassification (hye-Armn)": 10.11, - "MassiveIntentClassification (khm-Khmr)": 4.74, - "MassiveIntentClassification (lav-Latn)": 35.08, - "MassiveIntentClassification (deu-Latn)": 44.54, - "MassiveIntentClassification (spa-Latn)": 39.75, - "MassiveIntentClassification (ben-Beng)": 12.35, - "MassiveIntentClassification (por-Latn)": 42.83, - "MassiveIntentClassification (ara-Arab)": 20.42, - "MassiveIntentClassification (cym-Latn)": 30.82, - "MassiveIntentClassification (dan-Latn)": 42.36, - "MassiveIntentClassification (mya-Mymr)": 4.6, - "MassiveIntentClassification (heb-Hebr)": 23.6, - "MassiveIntentClassification (kan-Knda)": 3.76, - "MassiveIntentClassification (swa-Latn)": 31.82, - "MassiveIntentClassification (fas-Arab)": 22.45, - "MassiveIntentClassification (hin-Deva)": 17.68, - "MassiveIntentClassification (kat-Geor)": 7.66, - "MassiveIntentClassification (mal-Mlym)": 2.64, - "MassiveIntentClassification (fin-Latn)": 34.58, - "MassiveIntentClassification (slv-Latn)": 34.49, - "MassiveIntentClassification (afr-Latn)": 36.49, - "MassiveIntentClassification (urd-Arab)": 12.86, - "MassiveIntentClassification (ron-Latn)": 38.07, - "MassiveIntentClassification (sqi-Latn)": 37.26, - "MassiveIntentClassification (cmo-Hant)": 22.43, - "MassiveIntentClassification (ita-Latn)": 40.29, - "MassiveIntentClassification (ind-Latn)": 36.31, - "MassiveIntentClassification (nob-Latn)": 39.3, - "MassiveIntentClassification (jpn-Jpan)": 33.13, - "MassiveIntentClassification (aze-Latn)": 28.92, - "MassiveIntentClassification (mon-Cyrl)": 19.65, - "MassiveIntentClassification (ell-Grek)": 24.52, - "MassiveIntentClassification (rus-Cyrl)": 23.98, - "MassiveIntentClassification (kor-Kore)": 13.35, - "MassiveIntentClassification (cmo-Hans)": 24.36, - "MassiveIntentClassification (isl-Latn)": 31.46, - "MassiveIntentClassification (swe-Latn)": 39.02, - "MassiveIntentClassification (tel-Telu)": 2.26, - "MassiveIntentClassification (vie-Latn)": 31.47, - "MassiveIntentClassification (tgl-Latn)": 36.33, - "MassiveIntentClassification (amh-Ethi)": 2.39, - "MassiveScenarioClassification (en)": 75.67, - "MassiveScenarioClassification (tur-Latn)": 39.11, - "MassiveScenarioClassification (kat-Geor)": 13.45, - "MassiveScenarioClassification (jpn-Jpan)": 40.57, - "MassiveScenarioClassification (spa-Latn)": 50.92, - "MassiveScenarioClassification (fas-Arab)": 27.8, - "MassiveScenarioClassification (hun-Latn)": 41.01, - "MassiveScenarioClassification (jav-Latn)": 40.0, - "MassiveScenarioClassification (por-Latn)": 52.06, - "MassiveScenarioClassification (sqi-Latn)": 44.67, - "MassiveScenarioClassification (lav-Latn)": 39.28, - "MassiveScenarioClassification (deu-Latn)": 54.09, - "MassiveScenarioClassification (nld-Latn)": 47.79, - "MassiveScenarioClassification (mon-Cyrl)": 25.58, - "MassiveScenarioClassification (swa-Latn)": 40.34, - "MassiveScenarioClassification (ben-Beng)": 17.49, - "MassiveScenarioClassification (cym-Latn)": 34.82, - "MassiveScenarioClassification (swe-Latn)": 44.53, - "MassiveScenarioClassification (rus-Cyrl)": 28.71, - "MassiveScenarioClassification (fra-Latn)": 54.26, - "MassiveScenarioClassification (dan-Latn)": 49.45, - "MassiveScenarioClassification (mya-Mymr)": 10.8, - "MassiveScenarioClassification (ron-Latn)": 47.86, - "MassiveScenarioClassification (cmo-Hans)": 35.33, - "MassiveScenarioClassification (hin-Deva)": 23.13, - "MassiveScenarioClassification (cmo-Hant)": 31.7, - "MassiveScenarioClassification (afr-Latn)": 43.63, - "MassiveScenarioClassification (aze-Latn)": 36.42, - "MassiveScenarioClassification (msa-Latn)": 37.28, - "MassiveScenarioClassification (ell-Grek)": 33.85, - "MassiveScenarioClassification (isl-Latn)": 39.36, - "MassiveScenarioClassification (fin-Latn)": 38.41, - "MassiveScenarioClassification (ind-Latn)": 43.05, - "MassiveScenarioClassification (pol-Latn)": 42.66, - "MassiveScenarioClassification (tam-Taml)": 14.55, - "MassiveScenarioClassification (ita-Latn)": 51.37, - "MassiveScenarioClassification (urd-Arab)": 20.0, - "MassiveScenarioClassification (kan-Knda)": 8.34, - "MassiveScenarioClassification (tel-Telu)": 7.81, - "MassiveScenarioClassification (mal-Mlym)": 7.69, - "MassiveScenarioClassification (ara-Arab)": 27.8, - "MassiveScenarioClassification (kor-Kore)": 17.28, - "MassiveScenarioClassification (vie-Latn)": 35.9, - "MassiveScenarioClassification (amh-Ethi)": 7.43, - "MassiveScenarioClassification (heb-Hebr)": 25.49, - "MassiveScenarioClassification (hye-Armn)": 16.86, - "MassiveScenarioClassification (khm-Khmr)": 9.63, - "MassiveScenarioClassification (slv-Latn)": 39.88, - "MassiveScenarioClassification (tgl-Latn)": 47.04, - "MassiveScenarioClassification (nob-Latn)": 45.75, - "MassiveScenarioClassification (tha-Thai)": 17.01, - "MultilingualSentiment (cmn-Hans)": 41.2, - "NoRecClassification (nob-Latn)": 38.34, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15, - "OnlineShopping (cmn-Hans)": 56.94, - "PAC (pol-Latn)": 62.1, - "PolEmo2.0-IN (pol-Latn)": 41.63, - "PolEmo2.0-OUT (pol-Latn)": 25.0, - "RuReviewsClassification (rus-Cyrl)": 42.33, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 13.29, - "RuSciBenchOECDClassification (rus-Cyrl)": 10.62, - "TNews (cmn-Hans)": 21.05, - "ToxicConversationsClassification": 61.05, - "TweetSentimentExtractionClassification": 55.05, - "Waimai (cmn-Hans)": 63.31 + "Model": "voyage-law-2", + "AmazonReviewsClassification (fr)": 41.98, + "MTOPDomainClassification (fr)": 90.12, + "MTOPIntentClassification (fr)": 62.44, + "MasakhaNEWSClassification (fra)": 76.42, + "MassiveIntentClassification (fr)": 66.94, + "MassiveScenarioClassification (fr)": 72.78 } ] }, "Clustering": { "v_measure": [ { - "Model": "all-mpnet-base-v2", - "ArxivClusteringP2P": 48.38, - "ArxivClusteringS2S": 39.72, - "BiorxivClusteringP2P": 39.62, - "BiorxivClusteringS2S": 35.02, - "GeoreviewClusteringP2P (rus-Cyrl)": 20.33, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49, - "MasakhaNEWSClusteringP2P (eng)": 67.24, - "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99, - "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0, - "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68, - "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01, - "MasakhaNEWSClusteringP2P (run-Latn)": 57.6, - "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99, - "MasakhaNEWSClusteringP2P (som-Latn)": 31.16, - "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85, - "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24, - "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48, - "MasakhaNEWSClusteringS2S (eng)": 35.69, - "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05, - "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63, - "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72, - "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7, - "MasakhaNEWSClusteringS2S (run-Latn)": 52.27, - "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64, - "MasakhaNEWSClusteringS2S (som-Latn)": 30.94, - "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01, - "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16, - "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04, - "MedrxivClusteringP2P": 35.58, - "MedrxivClusteringS2S": 32.87, - "RedditClustering": 54.82, - "RedditClusteringP2P": 56.77, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 14.66, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 12.49, - "StackExchangeClustering": 53.8, - "StackExchangeClusteringP2P": 34.28, - "TwentyNewsgroupsClustering": 49.74 + "Model": "voyage-law-2", + "AlloProfClusteringP2P": 62.5, + "AlloProfClusteringS2S": 44.28, + "HALClusteringS2S": 26.36, + "MLSUMClusteringP2P (fr)": 44.03, + "MLSUMClusteringS2S (fr)": 42.95, + "MasakhaNEWSClusteringP2P (fra)": 50.68, + "MasakhaNEWSClusteringS2S (fra)": 38.79 } ] }, "PairClassification": { "ap": [ { - "Model": "all-mpnet-base-v2", - "CDSC-E (pol-Latn)": 45.37, - "OpusparcusPC (deu-Latn)": 89.78, - "OpusparcusPC (en)": 97.75, - "OpusparcusPC (fin-Latn)": 85.82, - "OpusparcusPC (fra-Latn)": 86.61, - "OpusparcusPC (rus-Cyrl)": 79.85, - "OpusparcusPC (swe-Latn)": 81.81, - "PSC (pol-Latn)": 83.28, - "PawsXPairClassification (deu-Latn)": 52.17, - "PawsXPairClassification (en)": 61.99, - "PawsXPairClassification (spa-Latn)": 55.06, - "PawsXPairClassification (fra-Latn)": 56.42, - "PawsXPairClassification (jpn-Hira)": 47.43, - "PawsXPairClassification (kor-Hang)": 49.75, - "PawsXPairClassification (cmn-Hans)": 52.47, - "SICK-E-PL (pol-Latn)": 46.51, - "SprintDuplicateQuestions": 90.15, - "TERRa (rus-Cyrl)": 44.52, - "TwitterSemEval2015": 73.85, - "TwitterURLCorpus": 85.11 + "Model": "voyage-law-2", + "OpusparcusPC (fr)": 93.06, + "PawsXPairClassification (fr)": 61.54 } ] }, "Reranking": { "map": [ { - "Model": "all-mpnet-base-v2", - "AlloprofReranking (fra-Latn)": 69.63, - "AskUbuntuDupQuestions": 65.85, - "MMarcoReranking (cmn-Hans)": 4.65, - "MindSmallReranking": 30.97, - "RuBQReranking (rus-Cyrl)": 30.96, - "SciDocsRR": 88.65, - "StackOverflowDupQuestions": 51.98, - "SyntecReranking (fra-Latn)": 66.12, - "T2Reranking (cmn-Hans)": 58.3 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "all-mpnet-base-v2", - "AILACasedocs": 22.51, - "AILAStatutes": 21.27, - "ARCChallenge": 11.8, - "AlloprofRetrieval (fra-Latn)": 34.27, - "AlphaNLI": 22.41, - "ArguAna": 46.52, - "ArguAna-PL (pol-Latn)": 14.72, - "BSARDRetrieval (fra-Latn)": 6.98, - "BrightRetrieval (robotics)": 8.36, - "BrightRetrieval (psychology)": 22.63, - "BrightRetrieval (leetcode)": 26.4, - "BrightRetrieval (biology)": 15.52, - "BrightRetrieval (theoremqa_questions)": 18.49, - "BrightRetrieval (economics)": 16.64, - "BrightRetrieval (stackoverflow)": 9.48, - "BrightRetrieval (pony)": 6.95, - "BrightRetrieval (earth_science)": 20.11, - "BrightRetrieval (theoremqa_theorems)": 12.38, - "BrightRetrieval (sustainable_living)": 15.34, - "BrightRetrieval (aops)": 5.32, - "CQADupstackRetrieval": 44.96, - "ClimateFEVER": 21.97, - "CmedqaRetrieval (cmn-Hans)": 2.0, - "CovidRetrieval (cmn-Hans)": 3.7, - "DBPedia": 32.09, - "DuRetrieval (cmn-Hans)": 4.92, - "EcomRetrieval (cmn-Hans)": 3.94, - "FEVER": 50.86, - "FiQA-PL (pol-Latn)": 3.6, - "FiQA2018": 49.96, - "GerDaLIRSmall (deu-Latn)": 3.78, - "HellaSwag": 26.27, - "HotpotQA": 39.29, - "LEMBNarrativeQARetrieval": 19.34, - "LEMBNeedleRetrieval": 16.0, - "LEMBPasskeyRetrieval": 24.5, - "LEMBQMSumRetrieval": 21.54, - "LEMBSummScreenFDRetrieval": 60.43, - "LEMBWikimQARetrieval": 44.92, - "LeCaRDv2 (zho-Hans)": 18.09, - "LegalBenchConsumerContractsQA": 75.25, - "LegalBenchCorporateLobbying": 89.04, - "LegalQuAD (deu-Latn)": 10.67, - "LegalSummarization": 58.55, - "MMarcoRetrieval (cmn-Hans)": 7.13, - "MSMARCO": 39.75, - "MedicalRetrieval (cmn-Hans)": 1.71, - "MintakaRetrieval (ara-Arab)": 1.97, - "MintakaRetrieval (deu-Latn)": 17.21, - "MintakaRetrieval (spa-Latn)": 10.11, - "MintakaRetrieval (fra-Latn)": 12.93, - "MintakaRetrieval (hin-Deva)": 2.05, - "MintakaRetrieval (ita-Latn)": 5.63, - "MintakaRetrieval (jpn-Hira)": 6.72, - "MintakaRetrieval (por-Latn)": 8.05, - "NFCorpus": 33.29, - "NFCorpus-PL (pol-Latn)": 8.77, - "NQ": 50.45, - "PIQA": 29.03, - "Quail": 3.41, - "QuoraRetrieval": 87.46, - "RARbCode": 53.21, - "RARbMath": 71.85, - "RuBQRetrieval (rus-Cyrl)": 4.75, - "SCIDOCS": 23.76, - "SCIDOCS-PL (pol-Latn)": 4.02, - "SIQA": 2.38, - "SciFact": 65.57, - "SciFact-PL (pol-Latn)": 13.31, - "SpartQA": 0.22, - "SyntecRetrieval (fra-Latn)": 57.39, - "T2Retrieval (cmn-Hans)": 2.98, - "TRECCOVID": 51.33, - "TRECCOVID-PL (pol-Latn)": 12.12, - "TempReasonL1": 1.77, - "TempReasonL2Fact": 11.2, - "TempReasonL2Pure": 1.15, - "TempReasonL3Fact": 9.42, - "TempReasonL3Pure": 5.59, - "Touche2020": 19.93, - "VideoRetrieval (cmn-Hans)": 8.48, - "WinoGrande": 20.8, - "XPQARetrieval (ara-Arab_ara-Arab)": 9.42, - "XPQARetrieval (eng-Latn_ara-Arab)": 2.39, - "XPQARetrieval (ara-Arab_eng-Latn)": 8.98, - "XPQARetrieval (deu-Latn_deu-Latn)": 55.82, - "XPQARetrieval (eng-Latn_deu-Latn)": 11.74, - "XPQARetrieval (deu-Latn_eng-Latn)": 30.44, - "XPQARetrieval (spa-Latn_spa-Latn)": 40.01, - "XPQARetrieval (eng-Latn_spa-Latn)": 6.12, - "XPQARetrieval (spa-Latn_eng-Latn)": 29.44, - "XPQARetrieval (fra-Latn_fra-Latn)": 51.94, - "XPQARetrieval (eng-Latn_fra-Latn)": 11.48, - "XPQARetrieval (fra-Latn_eng-Latn)": 32.52, - "XPQARetrieval (hin-Deva_hin-Deva)": 37.48, - "XPQARetrieval (eng-Latn_hin-Deva)": 5.11, - "XPQARetrieval (hin-Deva_eng-Latn)": 7.37, - "XPQARetrieval (ita-Latn_ita-Latn)": 54.2, - "XPQARetrieval (eng-Latn_ita-Latn)": 6.08, - "XPQARetrieval (ita-Latn_eng-Latn)": 30.32, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.45, - "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79, - "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77, - "XPQARetrieval (kor-Hang_kor-Hang)": 10.4, - "XPQARetrieval (eng-Latn_kor-Hang)": 7.09, - "XPQARetrieval (kor-Hang_eng-Latn)": 6.95, - "XPQARetrieval (pol-Latn_pol-Latn)": 23.67, - "XPQARetrieval (eng-Latn_pol-Latn)": 8.83, - "XPQARetrieval (pol-Latn_eng-Latn)": 15.94, - "XPQARetrieval (por-Latn_por-Latn)": 33.56, - "XPQARetrieval (eng-Latn_por-Latn)": 3.76, - "XPQARetrieval (por-Latn_eng-Latn)": 23.45, - "XPQARetrieval (tam-Taml_tam-Taml)": 5.53, - "XPQARetrieval (eng-Latn_tam-Taml)": 3.3, - "XPQARetrieval (tam-Taml_eng-Latn)": 4.0, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.84, - "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2, - "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84 + "Model": "voyage-law-2", + "AlloprofReranking": 72.92, + "SyntecReranking": 91.2 } - ] - }, - "STS": { - "spearman": [ - { - "Model": "all-mpnet-base-v2", - "AFQMC (cmn-Hans)": 8.01, - "ATEC (cmn-Hans)": 14.03, - "BIOSSES": 80.43, - "BQ (cmn-Hans)": 21.39, - "CDSC-R (pol-Latn)": 77.04, - "LCQMC (cmn-Hans)": 22.84, - "PAWSX (cmn-Hans)": 6.44, - "RUParaPhraserSTS (rus-Cyrl)": 42.15, - "RuSTSBenchmarkSTS (rus-Cyrl)": 55.68, - "SICK-R": 80.59, - "SICK-R-PL (pol-Latn)": 50.2, - "SICKFr (fra-Latn)": 67.05, - "STS12": 72.63, - "STS13": 83.48, - "STS14": 78.0, - "STS15": 85.66, - "STS16": 80.03, - "STS17 (en-en)": 90.6, - "STS17 (eng-Latn_ara-Arab)": 6.76, - "STS17 (fra-Latn_eng-Latn)": 41.64, - "STS17 (eng-Latn_tur-Latn)": -4.58, - "STS17 (eng-Latn_deu-Latn)": 35.5, - "STS17 (spa-Latn_eng-Latn)": 25.28, - "STS17 (ita-Latn_eng-Latn)": 31.8, - "STS17 (spa-Latn)": 78.4, - "STS17 (kor-Hang)": 39.11, - "STS17 (ara-Arab)": 55.42, - "STS17 (nld-Latn_eng-Latn)": 32.89, - "STS22 (en)": 68.39, - "STS22 (spa-Latn_eng-Latn)": 55.09, - "STS22 (deu-Latn_pol-Latn)": 23.53, - "STS22 (cmn-Hans_eng-Latn)": 40.47, - "STS22 (pol-Latn)": 24.21, - "STS22 (tur-Latn)": 29.35, - "STS22 (spa-Latn_ita-Latn)": 41.61, - "STS22 (fra-Latn_pol-Latn)": 73.25, - "STS22 (rus-Cyrl)": 15.83, - "STS22 (deu-Latn)": 27.0, - "STS22 (spa-Latn)": 55.98, - "STS22 (pol-Latn_eng-Latn)": 51.07, - "STS22 (fra-Latn)": 77.1, - "STS22 (deu-Latn_eng-Latn)": 49.73, - "STS22 (ara-Arab)": 38.96, - "STS22 (deu-Latn_fra-Latn)": 31.39, - "STS22 (ita-Latn)": 58.02, - "STS22 (cmn-Hans)": 42.24, - "STSB (cmn-Hans)": 37.7, - "STSBenchmark": 83.42, - "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54, - "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15, - "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72, - "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78, - "STSBenchmarkMultilingualSTS (en)": 83.42, - "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 62.12, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43, - "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36 + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "voyage-law-2", + "AILACasedocs": 44.56, + "AILAStatutes": 45.51, + "AlloprofRetrieval": 57.28, + "BSARDRetrieval": 11.83, + "GerDaLIRSmall": 44.91, + "LEMBNarrativeQARetrieval": 55.78, + "LEMBNeedleRetrieval": 80.5, + "LEMBPasskeyRetrieval": 93.75, + "LEMBQMSumRetrieval": 57.26, + "LEMBSummScreenFDRetrieval": 98.72, + "LEMBWikimQARetrieval": 87.08, + "LeCaRDv2": 72.75, + "LegalBenchConsumerContractsQA": 83.27, + "LegalBenchCorporateLobbying": 95.66, + "LegalQuAD": 67.47, + "LegalSummarization": 68.96, + "MintakaRetrieval (fr)": 34.92, + "SyntecRetrieval": 87.33, + "XPQARetrieval (fr)": 73.56 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "voyage-law-2", + "SICKFr": 74.09, + "STS22 (fr)": 83.75, + "STSBenchmarkMultilingualSTS (fr)": 83.02 } ] }, "Summarization": { "spearman": [ { - "Model": "all-mpnet-base-v2", - "SummEval": 27.49, - "SummEvalFr (fra-Latn)": 28.11 + "Model": "voyage-law-2", + "SummEvalFr": 30.34 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "all-mpnet-base-v2" + "Model": "voyage-law-2" } ] } }, - "e5-base-v2": { + "bert-base-multilingual-cased": { "BitextMining": { "f1": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased" } ] }, "Classification": { "accuracy": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "AmazonReviewsClassification (fr)": 29.39, + "MTOPDomainClassification (fr)": 63.61, + "MTOPIntentClassification (fr)": 37.84, + "MasakhaNEWSClassification (fra)": 64.0, + "MassiveIntentClassification (fr)": 37.3, + "MassiveScenarioClassification (fr)": 44.47 } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-base-v2", - "BiorxivClusteringP2P": 37.12, - "BiorxivClusteringS2S": 33.41, - "MedrxivClusteringP2P": 31.82, - "MedrxivClusteringS2S": 29.68, - "RedditClustering": 56.54, - "RedditClusteringP2P": 63.23, - "StackExchangeClustering": 64.6, - "StackExchangeClusteringP2P": 33.02, - "TwentyNewsgroupsClustering": 49.86 + "Model": "bert-base-multilingual-cased", + "AlloProfClusteringP2P": 51.5, + "AlloProfClusteringS2S": 43.06, + "HALClusteringS2S": 20.81, + "MLSUMClusteringP2P": 40.9, + "MLSUMClusteringS2S": 31.8, + "MasakhaNEWSClusteringP2P (fra)": 24.23, + "MasakhaNEWSClusteringS2S (fra)": 24.46 } ] }, "PairClassification": { "ap": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "OpusparcusPC (fr)": 86.77, + "PawsXPairClassification (fr)": 53.39 } ] }, "Reranking": { "map": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "AlloprofReranking": 36.23, + "SyntecReranking": 53.25 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "AlloprofRetrieval": 1.63, + "BSARDRetrieval": 0.0, + "MintakaRetrieval (fr)": 3.55, + "SyntecRetrieval": 18.95, + "XPQARetrieval (fr)": 18.49 } ] }, "STS": { "spearman": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "SICKFr": 58.75, + "STS22 (fr)": 39.05, + "STSBenchmarkMultilingualSTS (fr)": 52.25 } ] }, "Summarization": { "spearman": [ { - "Model": "e5-base-v2" + "Model": "bert-base-multilingual-cased", + "SummEvalFr": 28.81 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-base-v2", - "Core17InstructionRetrieval": -2.9, - "News21InstructionRetrieval": -2.0, - "Robust04InstructionRetrieval": -6.73 + "Model": "bert-base-multilingual-cased" } ] } }, - "text-search-babbage-001": { + "LLM2Vec-Meta-Llama-3-unsupervised": { "BitextMining": { "f1": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised" } ] }, "Classification": { "accuracy": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "AmazonCounterfactualClassification (en)": 75.7, + "AmazonPolarityClassification": 80.68, + "AmazonReviewsClassification (en)": 40.0, + "Banking77Classification": 84.77, + "EmotionClassification": 47.08, + "ImdbClassification": 75.19, + "MTOPDomainClassification (en)": 94.47, + "MTOPIntentClassification (en)": 81.09, + "MassiveIntentClassification (en)": 75.01, + "MassiveScenarioClassification (en)": 79.16, + "ToxicConversationsClassification": 71.85, + "TweetSentimentExtractionClassification": 57.61 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "ArxivClusteringP2P": 49.22, + "ArxivClusteringS2S": 41.71, + "BiorxivClusteringP2P": 38.39, + "BiorxivClusteringS2S": 31.31, + "MedrxivClusteringP2P": 31.47, + "MedrxivClusteringS2S": 27.87, + "RedditClustering": 43.67, + "RedditClusteringP2P": 61.67, + "StackExchangeClustering": 68.2, + "StackExchangeClusteringP2P": 36.36, + "TwentyNewsgroupsClustering": 32.01 } ] }, "PairClassification": { "ap": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "SprintDuplicateQuestions": 88.14, + "TwitterSemEval2015": 66.6, + "TwitterURLCorpus": 79.3 } ] }, "Reranking": { "map": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "AskUbuntuDupQuestions": 57.16, + "MindSmallReranking": 30.1, + "SciDocsRR": 76.28, + "StackOverflowDupQuestions": 48.82 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-search-babbage-001", - "ArguAna": 49.2, - "ClimateFEVER": 19.9, - "FEVER": 77.0, - "FiQA2018": 42.2, - "HotpotQA": 63.1, - "NFCorpus": 36.7, - "QuoraRetrieval": 69.7, - "SciFact": 70.4, - "TRECCOVID": 58.5, - "Touche2020": 29.7 + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "ArguAna": 51.73, + "CQADupstackRetrieval": 32.4, + "ClimateFEVER": 23.58, + "DBPedia": 26.78, + "FEVER": 53.42, + "FiQA2018": 28.56, + "HotpotQA": 52.37, + "MSMARCO": 17.47, + "NFCorpus": 26.28, + "NQ": 37.65, + "QuoraRetrieval": 84.64, + "SCIDOCS": 10.39, + "SciFact": 66.36, + "TRECCOVID": 63.34, + "Touche2020": 12.82 } ] }, "STS": { "spearman": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "BIOSSES": 84.67, + "SICK-R": 72.16, + "STS12": 61.6, + "STS13": 79.71, + "STS14": 72.11, + "STS15": 82.18, + "STS16": 79.41, + "STS17 (en-en)": 85.44, + "STS22 (en)": 63.9, + "STSBenchmark": 77.44 } ] }, "Summarization": { "spearman": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised", + "SummEval": 31.45 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text-search-babbage-001" + "Model": "LLM2Vec-Meta-Llama-3-unsupervised" } ] } }, - "all-mpnet-base-v2-instruct": { + "text-search-davinci-001": { "BitextMining": { "f1": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "Classification": { "accuracy": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "Clustering": { "v_measure": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "PairClassification": { "ap": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "Reranking": { "map": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "all-mpnet-base-v2-instruct", - "ARCChallenge": 10.35, - "AlphaNLI": 1.96, - "HellaSwag": 13.01, - "PIQA": 27.18, - "Quail": 3.02, - "RARbCode": 48.95, - "RARbMath": 69.21, - "SIQA": 1.29, - "SpartQA": 1.01, - "TempReasonL1": 1.52, - "TempReasonL2Fact": 7.28, - "TempReasonL2Pure": 1.03, - "TempReasonL3Fact": 7.03, - "TempReasonL3Pure": 5.16, - "WinoGrande": 9.66 + "Model": "text-search-davinci-001", + "ArguAna": 43.5, + "ClimateFEVER": 22.3, + "FEVER": 77.5, + "FiQA2018": 51.2, + "HotpotQA": 68.8, + "NFCorpus": 40.7, + "QuoraRetrieval": 63.8, + "SciFact": 75.4, + "TRECCOVID": 64.9, + "Touche2020": 29.1 } ] }, "STS": { "spearman": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "Summarization": { "spearman": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "all-mpnet-base-v2-instruct" + "Model": "text-search-davinci-001" } ] } }, - "bge-small-en-v1.5-instruct": { + "m3e-base": { "BitextMining": { "f1": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base" } ] }, "Classification": { "accuracy": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base", + "AmazonReviewsClassification (zh)": 43.02, + "IFlyTek": 44.42, + "JDReview": 85.33, + "MassiveIntentClassification (zh-CN)": 68.4, + "MassiveScenarioClassification (zh-CN)": 74.6, + "MultilingualSentiment": 71.9, + "OnlineShopping": 87.77, + "TNews": 48.28, + "Waimai": 83.99 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base", + "CLSClusteringP2P": 39.81, + "CLSClusteringS2S": 37.34, + "ThuNewsClusteringP2P": 59.77, + "ThuNewsClusteringS2S": 53.78 } ] }, "PairClassification": { "ap": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base", + "Cmnli": 69.98, + "Ocnli": 58.0 } ] }, "Reranking": { "map": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base", + "CMedQAv1": 77.05, + "CMedQAv2": 76.76, + "MMarcoReranking": 17.51, + "T2Reranking": 66.03 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-small-en-v1.5-instruct", - "ARCChallenge": 7.72, - "AlphaNLI": 1.26, - "HellaSwag": 23.41, - "PIQA": 20.79, - "Quail": 2.01, - "RARbCode": 41.52, - "RARbMath": 46.5, - "SIQA": 0.98, - "SpartQA": 2.86, - "TempReasonL1": 1.27, - "TempReasonL2Fact": 16.72, - "TempReasonL2Pure": 1.1, - "TempReasonL3Fact": 12.81, - "TempReasonL3Pure": 4.63, - "WinoGrande": 5.35 + "Model": "m3e-base", + "CmedqaRetrieval": 30.33, + "CovidRetrieval": 66.42, + "DuRetrieval": 75.76, + "EcomRetrieval": 50.27, + "MMarcoRetrieval": 65.46, + "MedicalRetrieval": 42.79, + "T2Retrieval": 73.14, + "VideoRetrieval": 51.11 } ] }, "STS": { "spearman": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base", + "AFQMC": 35.87, + "ATEC": 41.27, + "BQ": 63.81, + "LCQMC": 74.88, + "PAWSX": 12.19, + "QBQTC": 32.07, + "STS22 (zh)": 66.73, + "STSB": 76.97 } ] }, "Summarization": { "spearman": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bge-small-en-v1.5-instruct" + "Model": "m3e-base" } ] } }, - "LaBSE-en-ru": { + "multilingual-e5-large": { "BitextMining": { "f1": [ { - "Model": "LaBSE-en-ru", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.62 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "LaBSE-en-ru", - "GeoreviewClassification (rus-Cyrl)": 40.89, - "HeadlineClassification (rus-Cyrl)": 68.75, - "InappropriatenessClassification (rus-Cyrl)": 58.48, - "KinopoiskClassification (rus-Cyrl)": 49.85, - "MassiveIntentClassification (swa-Latn)": 19.98, - "MassiveIntentClassification (aze-Latn)": 19.52, - "MassiveIntentClassification (tur-Latn)": 24.12, - "MassiveIntentClassification (cmo-Hans)": 3.96, - "MassiveIntentClassification (amh-Ethi)": 2.76, - "MassiveIntentClassification (kan-Knda)": 2.86, - "MassiveIntentClassification (hin-Deva)": 3.29, - "MassiveIntentClassification (tgl-Latn)": 27.08, - "MassiveIntentClassification (tha-Thai)": 4.0, - "MassiveIntentClassification (swe-Latn)": 32.01, - "MassiveIntentClassification (deu-Latn)": 35.14, - "MassiveIntentClassification (spa-Latn)": 37.67, - "MassiveIntentClassification (por-Latn)": 39.84, - "MassiveIntentClassification (jpn-Jpan)": 4.78, - "MassiveIntentClassification (fin-Latn)": 31.11, - "MassiveIntentClassification (kat-Geor)": 2.87, - "MassiveIntentClassification (slv-Latn)": 35.66, - "MassiveIntentClassification (rus-Cyrl)": 60.53, - "MassiveIntentClassification (ita-Latn)": 43.32, - "MassiveIntentClassification (tel-Telu)": 2.72, - "MassiveIntentClassification (afr-Latn)": 30.59, - "MassiveIntentClassification (isl-Latn)": 25.61, - "MassiveIntentClassification (fas-Arab)": 3.71, - "MassiveIntentClassification (vie-Latn)": 23.0, - "MassiveIntentClassification (ben-Beng)": 3.35, - "MassiveIntentClassification (hye-Armn)": 2.8, - "MassiveIntentClassification (pol-Latn)": 31.3, - "MassiveIntentClassification (cym-Latn)": 26.59, - "MassiveIntentClassification (jav-Latn)": 26.84, - "MassiveIntentClassification (mon-Cyrl)": 35.97, - "MassiveIntentClassification (en)": 60.48, - "MassiveIntentClassification (msa-Latn)": 27.82, - "MassiveIntentClassification (nob-Latn)": 35.78, - "MassiveIntentClassification (heb-Hebr)": 2.33, - "MassiveIntentClassification (khm-Khmr)": 4.6, - "MassiveIntentClassification (nld-Latn)": 34.66, - "MassiveIntentClassification (ind-Latn)": 33.31, - "MassiveIntentClassification (mal-Mlym)": 2.63, - "MassiveIntentClassification (tam-Taml)": 2.22, - "MassiveIntentClassification (mya-Mymr)": 3.57, - "MassiveIntentClassification (urd-Arab)": 3.36, - "MassiveIntentClassification (dan-Latn)": 38.66, - "MassiveIntentClassification (cmo-Hant)": 5.29, - "MassiveIntentClassification (ron-Latn)": 37.45, - "MassiveIntentClassification (lav-Latn)": 23.92, - "MassiveIntentClassification (fra-Latn)": 40.29, - "MassiveIntentClassification (ell-Grek)": 11.14, - "MassiveIntentClassification (sqi-Latn)": 35.84, - "MassiveIntentClassification (hun-Latn)": 26.74, - "MassiveIntentClassification (kor-Kore)": 2.69, - "MassiveIntentClassification (ara-Arab)": 5.19, - "MassiveScenarioClassification (swa-Latn)": 25.61, - "MassiveScenarioClassification (aze-Latn)": 24.48, - "MassiveScenarioClassification (tur-Latn)": 31.38, - "MassiveScenarioClassification (cmo-Hans)": 9.98, - "MassiveScenarioClassification (amh-Ethi)": 7.59, - "MassiveScenarioClassification (kan-Knda)": 8.73, - "MassiveScenarioClassification (hin-Deva)": 8.77, - "MassiveScenarioClassification (tgl-Latn)": 35.12, - "MassiveScenarioClassification (tha-Thai)": 8.69, - "MassiveScenarioClassification (swe-Latn)": 35.83, - "MassiveScenarioClassification (deu-Latn)": 41.72, - "MassiveScenarioClassification (spa-Latn)": 43.33, - "MassiveScenarioClassification (por-Latn)": 44.62, - "MassiveScenarioClassification (jpn-Jpan)": 9.51, - "MassiveScenarioClassification (fin-Latn)": 33.79, - "MassiveScenarioClassification (kat-Geor)": 7.32, - "MassiveScenarioClassification (slv-Latn)": 37.6, - "MassiveScenarioClassification (rus-Cyrl)": 65.15, - "MassiveScenarioClassification (ita-Latn)": 47.28, - "MassiveScenarioClassification (tel-Telu)": 7.53, - "MassiveScenarioClassification (afr-Latn)": 37.27, - "MassiveScenarioClassification (isl-Latn)": 30.32, - "MassiveScenarioClassification (fas-Arab)": 6.83, - "MassiveScenarioClassification (vie-Latn)": 28.92, - "MassiveScenarioClassification (ben-Beng)": 8.57, - "MassiveScenarioClassification (hye-Armn)": 8.91, - "MassiveScenarioClassification (pol-Latn)": 33.75, - "MassiveScenarioClassification (cym-Latn)": 30.38, - "MassiveScenarioClassification (jav-Latn)": 33.94, - "MassiveScenarioClassification (mon-Cyrl)": 41.53, - "MassiveScenarioClassification (en)": 65.43, - "MassiveScenarioClassification (msa-Latn)": 36.28, - "MassiveScenarioClassification (nob-Latn)": 42.43, - "MassiveScenarioClassification (heb-Hebr)": 8.64, - "MassiveScenarioClassification (khm-Khmr)": 9.99, - "MassiveScenarioClassification (nld-Latn)": 41.47, - "MassiveScenarioClassification (ind-Latn)": 39.05, - "MassiveScenarioClassification (mal-Mlym)": 7.24, - "MassiveScenarioClassification (tam-Taml)": 7.71, - "MassiveScenarioClassification (mya-Mymr)": 9.94, - "MassiveScenarioClassification (urd-Arab)": 9.16, - "MassiveScenarioClassification (dan-Latn)": 44.69, - "MassiveScenarioClassification (cmo-Hant)": 10.48, - "MassiveScenarioClassification (ron-Latn)": 44.55, - "MassiveScenarioClassification (lav-Latn)": 26.26, - "MassiveScenarioClassification (fra-Latn)": 45.08, - "MassiveScenarioClassification (ell-Grek)": 19.46, - "MassiveScenarioClassification (sqi-Latn)": 40.9, - "MassiveScenarioClassification (hun-Latn)": 33.92, - "MassiveScenarioClassification (kor-Kore)": 7.37, - "MassiveScenarioClassification (ara-Arab)": 12.43, - "RuReviewsClassification (rus-Cyrl)": 58.01, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 52.8, - "RuSciBenchOECDClassification (rus-Cyrl)": 40.36 + "Model": "multilingual-e5-large", + "BornholmBitextMining (dan-Latn)": 29.61, + "BornholmBitextMining": 44.16, + "Tatoeba (tgl-Latn_eng-Latn)": 92.0, + "Tatoeba (gsw-Latn_eng-Latn)": 51.65, + "Tatoeba (tzl-Latn_eng-Latn)": 53.16, + "Tatoeba (slv-Latn_eng-Latn)": 89.57, + "Tatoeba (jav-Latn_eng-Latn)": 75.46, + "Tatoeba (uig-Arab_eng-Latn)": 72.17, + "Tatoeba (ind-Latn_eng-Latn)": 92.9, + "Tatoeba (rus-Cyrl_eng-Latn)": 92.32, + "Tatoeba (war-Latn_eng-Latn)": 62.02, + "Tatoeba (mar-Deva_eng-Latn)": 88.58, + "Tatoeba (mkd-Cyrl_eng-Latn)": 85.63, + "Tatoeba (jpn-Jpan_eng-Latn)": 95.28, + "Tatoeba (hun-Latn_eng-Latn)": 94.01, + "Tatoeba (slk-Latn_eng-Latn)": 93.13, + "Tatoeba (tha-Thai_eng-Latn)": 95.38, + "Tatoeba (fra-Latn_eng-Latn)": 93.42, + "Tatoeba (ukr-Cyrl_eng-Latn)": 93.32, + "Tatoeba (kat-Geor_eng-Latn)": 84.09, + "Tatoeba (nov-Latn_eng-Latn)": 71.62, + "Tatoeba (kor-Hang_eng-Latn)": 90.65, + "Tatoeba (ben-Beng_eng-Latn)": 83.02, + "Tatoeba (cor-Latn_eng-Latn)": 6.28, + "Tatoeba (lfn-Latn_eng-Latn)": 62.91, + "Tatoeba (swh-Latn_eng-Latn)": 71.61, + "Tatoeba (tur-Latn_eng-Latn)": 96.27, + "Tatoeba (cbk-Latn_eng-Latn)": 69.26, + "Tatoeba (kur-Latn_eng-Latn)": 66.83, + "Tatoeba (arq-Arab_eng-Latn)": 41.56, + "Tatoeba (ceb-Latn_eng-Latn)": 55.31, + "Tatoeba (max-Deva_eng-Latn)": 63.41, + "Tatoeba (ang-Latn_eng-Latn)": 40.18, + "Tatoeba (nds-Latn_eng-Latn)": 69.28, + "Tatoeba (epo-Latn_eng-Latn)": 96.01, + "Tatoeba (heb-Hebr_eng-Latn)": 86.61, + "Tatoeba (yue-Hant_eng-Latn)": 88.71, + "Tatoeba (dan-Latn_eng-Latn)": 95.08, + "Tatoeba (swe-Latn_eng-Latn)": 95.3, + "Tatoeba (lvs-Latn_eng-Latn)": 90.06, + "Tatoeba (ast-Latn_eng-Latn)": 81.76, + "Tatoeba (dsb-Latn_eng-Latn)": 48.44, + "Tatoeba (pes-Arab_eng-Latn)": 92.14, + "Tatoeba (dtp-Latn_eng-Latn)": 7.03, + "Tatoeba (tuk-Latn_eng-Latn)": 33.15, + "Tatoeba (isl-Latn_eng-Latn)": 92.09, + "Tatoeba (khm-Khmr_eng-Latn)": 59.96, + "Tatoeba (pam-Latn_eng-Latn)": 9.32, + "Tatoeba (tat-Cyrl_eng-Latn)": 73.51, + "Tatoeba (bos-Latn_eng-Latn)": 92.86, + "Tatoeba (spa-Latn_eng-Latn)": 97.1, + "Tatoeba (kaz-Cyrl_eng-Latn)": 79.67, + "Tatoeba (bel-Cyrl_eng-Latn)": 91.08, + "Tatoeba (zsm-Latn_eng-Latn)": 94.53, + "Tatoeba (cat-Latn_eng-Latn)": 91.03, + "Tatoeba (urd-Arab_eng-Latn)": 89.21, + "Tatoeba (mon-Cyrl_eng-Latn)": 87.53, + "Tatoeba (tam-Taml_eng-Latn)": 88.23, + "Tatoeba (fry-Latn_eng-Latn)": 63.43, + "Tatoeba (nob-Latn_eng-Latn)": 97.2, + "Tatoeba (tel-Telu_eng-Latn)": 91.34, + "Tatoeba (hye-Armn_eng-Latn)": 90.92, + "Tatoeba (awa-Deva_eng-Latn)": 72.27, + "Tatoeba (hrv-Latn_eng-Latn)": 96.15, + "Tatoeba (ile-Latn_eng-Latn)": 79.16, + "Tatoeba (amh-Ethi_eng-Latn)": 80.69, + "Tatoeba (orv-Cyrl_eng-Latn)": 39.87, + "Tatoeba (ara-Arab_eng-Latn)": 85.48, + "Tatoeba (ido-Latn_eng-Latn)": 83.52, + "Tatoeba (hin-Deva_eng-Latn)": 94.48, + "Tatoeba (por-Latn_eng-Latn)": 93.63, + "Tatoeba (ron-Latn_eng-Latn)": 94.87, + "Tatoeba (swg-Latn_eng-Latn)": 55.64, + "Tatoeba (cmn-Hans_eng-Latn)": 95.28, + "Tatoeba (pol-Latn_eng-Latn)": 96.6, + "Tatoeba (bul-Cyrl_eng-Latn)": 92.93, + "Tatoeba (ina-Latn_eng-Latn)": 93.47, + "Tatoeba (bre-Latn_eng-Latn)": 11.1, + "Tatoeba (wuu-Hans_eng-Latn)": 86.37, + "Tatoeba (lit-Latn_eng-Latn)": 88.48, + "Tatoeba (csb-Latn_eng-Latn)": 36.98, + "Tatoeba (lat-Latn_eng-Latn)": 53.37, + "Tatoeba (gle-Latn_eng-Latn)": 71.48, + "Tatoeba (ita-Latn_eng-Latn)": 93.29, + "Tatoeba (srp-Cyrl_eng-Latn)": 93.1, + "Tatoeba (arz-Arab_eng-Latn)": 74.73, + "Tatoeba (cym-Latn_eng-Latn)": 76.21, + "Tatoeba (ber-Tfng_eng-Latn)": 38.9, + "Tatoeba (xho-Latn_eng-Latn)": 80.87, + "Tatoeba (uzb-Latn_eng-Latn)": 72.35, + "Tatoeba (pms-Latn_eng-Latn)": 59.85, + "Tatoeba (est-Latn_eng-Latn)": 85.03, + "Tatoeba (deu-Latn_eng-Latn)": 99.07, + "Tatoeba (yid-Hebr_eng-Latn)": 76.33, + "Tatoeba (ell-Grek_eng-Latn)": 93.88, + "Tatoeba (afr-Latn_eng-Latn)": 90.22, + "Tatoeba (fao-Latn_eng-Latn)": 72.62, + "Tatoeba (nld-Latn_eng-Latn)": 96.63, + "Tatoeba (hsb-Latn_eng-Latn)": 58.9, + "Tatoeba (aze-Latn_eng-Latn)": 87.61, + "Tatoeba (kzj-Latn_eng-Latn)": 7.91, + "Tatoeba (kab-Latn_eng-Latn)": 36.54, + "Tatoeba (mal-Mlym_eng-Latn)": 97.7, + "Tatoeba (mhr-Cyrl_eng-Latn)": 6.79, + "Tatoeba (ces-Latn_eng-Latn)": 94.89, + "Tatoeba (gla-Latn_eng-Latn)": 59.0, + "Tatoeba (cha-Latn_eng-Latn)": 27.16, + "Tatoeba (glg-Latn_eng-Latn)": 93.34, + "Tatoeba (vie-Latn_eng-Latn)": 97.0, + "Tatoeba (oci-Latn_eng-Latn)": 54.91, + "Tatoeba (nno-Latn_eng-Latn)": 91.4, + "Tatoeba (fin-Latn_eng-Latn)": 95.44, + "Tatoeba (eus-Latn_eng-Latn)": 77.82, + "Tatoeba (sqi-Latn_eng-Latn)": 94.7 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "multilingual-e5-large", + "AllegroReviews (pol-Latn)": 41.04, + "AllegroReviews": 41.14, + "AmazonCounterfactualClassification (en-ext)": 78.73, + "AmazonCounterfactualClassification (en)": 78.67, + "AmazonCounterfactualClassification (deu-Latn)": 68.66, + "AmazonCounterfactualClassification (jpn-Jpan)": 78.8, + "AmazonPolarityClassification": 93.26, + "AmazonReviewsClassification (en)": 49.2, + "AmazonReviewsClassification (deu-Latn)": 46.5, + "AmazonReviewsClassification (spa-Latn)": 44.35, + "AmazonReviewsClassification (fra-Latn)": 42.55, + "AmazonReviewsClassification (jpn-Jpan)": 41.71, + "AmazonReviewsClassification (cmn-Hans)": 38.87, + "AmazonReviewsClassification (fr)": 41.91, + "AngryTweetsClassification (dan-Latn)": 57.69, + "AngryTweetsClassification": 54.95, + "Banking77Classification": 75.88, + "CBD (pol-Latn)": 69.84, + "CBD": 69.9, + "DKHateClassification": 66.02, + "DanishPoliticalCommentsClassification (dan-Latn)": 39.43, + "DanishPoliticalCommentsClassification": 38.27, + "EmotionClassification": 47.58, + "GeoreviewClassification (rus-Cyrl)": 49.69, + "HeadlineClassification (rus-Cyrl)": 77.19, + "IFlyTek (cmn-Hans)": 41.86, + "IFlyTek": 45.47, + "ImdbClassification": 90.23, + "InappropriatenessClassification (rus-Cyrl)": 61.6, + "JDReview (cmn-Hans)": 80.54, + "JDReview": 80.99, + "KinopoiskClassification (rus-Cyrl)": 56.59, + "LccSentimentClassification (dan-Latn)": 61.53, + "LccSentimentClassification": 59.6, + "MTOPDomainClassification (en)": 91.81, + "MTOPDomainClassification (deu-Latn)": 90.44, + "MTOPDomainClassification (spa-Latn)": 88.34, + "MTOPDomainClassification (fra-Latn)": 86.23, + "MTOPDomainClassification (hin-Deva)": 86.84, + "MTOPDomainClassification (tha-Thai)": 86.88, + "MTOPDomainClassification (fr)": 86.41, + "MTOPIntentClassification (en)": 64.29, + "MTOPIntentClassification (deu-Latn)": 65.97, + "MTOPIntentClassification (spa-Latn)": 61.9, + "MTOPIntentClassification (fra-Latn)": 56.25, + "MTOPIntentClassification (hin-Deva)": 59.17, + "MTOPIntentClassification (tha-Thai)": 62.59, + "MTOPIntentClassification (fr)": 59.43, + "MasakhaNEWSClassification (amh-Ethi)": 83.7, + "MasakhaNEWSClassification (eng)": 78.26, + "MasakhaNEWSClassification (fra-Latn)": 76.11, + "MasakhaNEWSClassification (hau-Latn)": 76.17, + "MasakhaNEWSClassification (ibo-Latn)": 70.05, + "MasakhaNEWSClassification (lin-Latn)": 75.89, + "MasakhaNEWSClassification (lug-Latn)": 73.63, + "MasakhaNEWSClassification (orm-Ethi)": 80.31, + "MasakhaNEWSClassification (pcm-Latn)": 89.15, + "MasakhaNEWSClassification (run-Latn)": 76.55, + "MasakhaNEWSClassification (sna-Latn)": 86.99, + "MasakhaNEWSClassification (som-Latn)": 64.63, + "MasakhaNEWSClassification (swa-Latn)": 73.42, + "MasakhaNEWSClassification (tir-Ethi)": 72.06, + "MasakhaNEWSClassification (xho-Latn)": 82.56, + "MasakhaNEWSClassification (yor-Latn)": 81.09, + "MasakhaNEWSClassification (fra)": 79.38, + "MassiveIntentClassification (kor-Kore)": 63.92, + "MassiveIntentClassification (lav-Latn)": 58.31, + "MassiveIntentClassification (isl-Latn)": 53.3, + "MassiveIntentClassification (tel-Telu)": 53.96, + "MassiveIntentClassification (mya-Mymr)": 49.73, + "MassiveIntentClassification (nob-Latn)": 64.54, + "MassiveIntentClassification (en)": 68.51, + "MassiveIntentClassification (spa-Latn)": 64.01, + "MassiveIntentClassification (swe-Latn)": 66.52, + "MassiveIntentClassification (cmo-Hant)": 58.78, + "MassiveIntentClassification (pol-Latn)": 65.09, + "MassiveIntentClassification (rus-Cyrl)": 65.76, + "MassiveIntentClassification (aze-Latn)": 54.68, + "MassiveIntentClassification (fin-Latn)": 64.28, + "MassiveIntentClassification (cmo-Hans)": 66.23, + "MassiveIntentClassification (urd-Arab)": 54.6, + "MassiveIntentClassification (tam-Taml)": 53.41, + "MassiveIntentClassification (hin-Deva)": 60.93, + "MassiveIntentClassification (deu-Latn)": 63.82, + "MassiveIntentClassification (ell-Grek)": 64.34, + "MassiveIntentClassification (hye-Armn)": 50.89, + "MassiveIntentClassification (por-Latn)": 65.6, + "MassiveIntentClassification (nld-Latn)": 65.0, + "MassiveIntentClassification (fas-Arab)": 63.74, + "MassiveIntentClassification (ron-Latn)": 59.76, + "MassiveIntentClassification (slv-Latn)": 59.38, + "MassiveIntentClassification (heb-Hebr)": 62.44, + "MassiveIntentClassification (vie-Latn)": 63.39, + "MassiveIntentClassification (sqi-Latn)": 57.3, + "MassiveIntentClassification (khm-Khmr)": 34.88, + "MassiveIntentClassification (ben-Beng)": 55.6, + "MassiveIntentClassification (tgl-Latn)": 54.77, + "MassiveIntentClassification (jpn-Jpan)": 67.11, + "MassiveIntentClassification (kat-Geor)": 41.45, + "MassiveIntentClassification (afr-Latn)": 53.69, + "MassiveIntentClassification (cym-Latn)": 44.22, + "MassiveIntentClassification (amh-Ethi)": 45.48, + "MassiveIntentClassification (ita-Latn)": 63.89, + "MassiveIntentClassification (mal-Mlym)": 57.58, + "MassiveIntentClassification (tha-Thai)": 62.75, + "MassiveIntentClassification (ind-Latn)": 63.51, + "MassiveIntentClassification (jav-Latn)": 48.96, + "MassiveIntentClassification (dan-Latn)": 63.7, + "MassiveIntentClassification (ara-Arab)": 54.1, + "MassiveIntentClassification (kan-Knda)": 53.45, + "MassiveIntentClassification (hun-Latn)": 64.0, + "MassiveIntentClassification (tur-Latn)": 64.61, + "MassiveIntentClassification (msa-Latn)": 58.49, + "MassiveIntentClassification (mon-Cyrl)": 49.6, + "MassiveIntentClassification (swa-Latn)": 47.69, + "MassiveIntentClassification (fra-Latn)": 63.37, + "MassiveIntentClassification (da)": 60.16, + "MassiveIntentClassification (nb)": 59.83, + "MassiveIntentClassification (sv)": 61.78, + "MassiveIntentClassification (pl)": 65.07, + "MassiveScenarioClassification (heb-Hebr)": 67.72, + "MassiveScenarioClassification (vie-Latn)": 68.91, + "MassiveScenarioClassification (cmo-Hant)": 64.35, + "MassiveScenarioClassification (urd-Arab)": 60.89, + "MassiveScenarioClassification (isl-Latn)": 60.74, + "MassiveScenarioClassification (ell-Grek)": 69.74, + "MassiveScenarioClassification (mon-Cyrl)": 55.37, + "MassiveScenarioClassification (swa-Latn)": 56.27, + "MassiveScenarioClassification (tam-Taml)": 58.76, + "MassiveScenarioClassification (hye-Armn)": 55.76, + "MassiveScenarioClassification (amh-Ethi)": 52.69, + "MassiveScenarioClassification (ben-Beng)": 61.85, + "MassiveScenarioClassification (tel-Telu)": 59.49, + "MassiveScenarioClassification (dan-Latn)": 71.18, + "MassiveScenarioClassification (slv-Latn)": 65.33, + "MassiveScenarioClassification (en)": 73.04, + "MassiveScenarioClassification (rus-Cyrl)": 70.85, + "MassiveScenarioClassification (mal-Mlym)": 63.17, + "MassiveScenarioClassification (sqi-Latn)": 63.79, + "MassiveScenarioClassification (ita-Latn)": 69.45, + "MassiveScenarioClassification (kor-Kore)": 70.54, + "MassiveScenarioClassification (cmo-Hans)": 72.25, + "MassiveScenarioClassification (cym-Latn)": 51.25, + "MassiveScenarioClassification (pol-Latn)": 69.83, + "MassiveScenarioClassification (ind-Latn)": 69.43, + "MassiveScenarioClassification (tur-Latn)": 68.12, + "MassiveScenarioClassification (tgl-Latn)": 60.71, + "MassiveScenarioClassification (hin-Deva)": 66.85, + "MassiveScenarioClassification (spa-Latn)": 69.07, + "MassiveScenarioClassification (lav-Latn)": 64.28, + "MassiveScenarioClassification (mya-Mymr)": 54.03, + "MassiveScenarioClassification (ara-Arab)": 61.0, + "MassiveScenarioClassification (kan-Knda)": 59.36, + "MassiveScenarioClassification (jav-Latn)": 56.24, + "MassiveScenarioClassification (por-Latn)": 68.33, + "MassiveScenarioClassification (tha-Thai)": 69.06, + "MassiveScenarioClassification (aze-Latn)": 58.49, + "MassiveScenarioClassification (fra-Latn)": 68.74, + "MassiveScenarioClassification (ron-Latn)": 66.06, + "MassiveScenarioClassification (nld-Latn)": 71.11, + "MassiveScenarioClassification (fas-Arab)": 67.55, + "MassiveScenarioClassification (deu-Latn)": 71.25, + "MassiveScenarioClassification (nob-Latn)": 70.44, + "MassiveScenarioClassification (msa-Latn)": 63.55, + "MassiveScenarioClassification (afr-Latn)": 62.35, + "MassiveScenarioClassification (hun-Latn)": 70.53, + "MassiveScenarioClassification (swe-Latn)": 72.77, + "MassiveScenarioClassification (kat-Geor)": 47.82, + "MassiveScenarioClassification (jpn-Jpan)": 73.16, + "MassiveScenarioClassification (khm-Khmr)": 41.14, + "MassiveScenarioClassification (fin-Latn)": 68.62, + "MassiveScenarioClassification (da)": 67.46, + "MassiveScenarioClassification (nb)": 66.18, + "MassiveScenarioClassification (sv)": 69.15, + "MassiveScenarioClassification (pl)": 69.82, + "MultilingualSentiment (cmn-Hans)": 70.81, + "MultilingualSentiment": 68.58, + "NoRecClassification (nob-Latn)": 58.43, + "NoRecClassification": 62.76, + "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 80.15, + "NordicLangClassification": 82.29, + "NorwegianParliament": 60.36, + "OnlineShopping (cmn-Hans)": 90.45, + "OnlineShopping": 90.81, + "PAC (pol-Latn)": 70.33, + "PAC": 70.37, + "PolEmo2.0-IN (pol-Latn)": 77.06, + "PolEmo2.0-IN": 77.06, + "PolEmo2.0-OUT (pol-Latn)": 53.48, + "PolEmo2.0-OUT": 53.38, + "RuReviewsClassification (rus-Cyrl)": 65.28, + "RuSciBenchGRNTIClassification (rus-Cyrl)": 58.2, + "RuSciBenchOECDClassification (rus-Cyrl)": 43.91, + "ScalaDaClassification": 50.77, + "ScalaNbClassification": 50.44, + "TNews (cmn-Hans)": 48.8, + "TNews": 48.38, + "ToxicConversationsClassification": 66.01, + "TweetSentimentExtractionClassification": 62.8, + "Waimai (cmn-Hans)": 86.3, + "Waimai": 85.02 } ] }, "Clustering": { "v_measure": [ { - "Model": "LaBSE-en-ru", - "GeoreviewClusteringP2P (rus-Cyrl)": 51.89, - "MLSUMClusteringP2P (rus-Cyrl)": 37.87, - "MLSUMClusteringS2S (rus-Cyrl)": 41.24, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 47.48, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 41.16 + "Model": "multilingual-e5-large", + "8TagsClustering": 33.88, + "AlloProfClusteringP2P": 62.99, + "AlloProfClusteringS2S": 32.26, + "BiorxivClusteringP2P": 35.5, + "BiorxivClusteringS2S": 33.3, + "CLSClusteringP2P": 40.68, + "CLSClusteringS2S": 38.59, + "GeoreviewClusteringP2P (rus-Cyrl)": 60.51, + "HALClusteringS2S": 22.44, + "MLSUMClusteringP2P (rus-Cyrl)": 42.79, + "MLSUMClusteringP2P": 44.04, + "MLSUMClusteringS2S (rus-Cyrl)": 44.32, + "MLSUMClusteringS2S": 37.65, + "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.16, + "MasakhaNEWSClusteringP2P (eng)": 61.1, + "MasakhaNEWSClusteringP2P (fra-Latn)": 41.66, + "MasakhaNEWSClusteringP2P (hau-Latn)": 60.7, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 48.41, + "MasakhaNEWSClusteringP2P (lin-Latn)": 57.69, + "MasakhaNEWSClusteringP2P (lug-Latn)": 71.95, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 60.14, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 80.84, + "MasakhaNEWSClusteringP2P (run-Latn)": 59.91, + "MasakhaNEWSClusteringP2P (sna-Latn)": 53.3, + "MasakhaNEWSClusteringP2P (som-Latn)": 34.38, + "MasakhaNEWSClusteringP2P (swa-Latn)": 33.25, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 54.21, + "MasakhaNEWSClusteringP2P (xho-Latn)": 41.12, + "MasakhaNEWSClusteringP2P (yor-Latn)": 36.22, + "MasakhaNEWSClusteringP2P (fra)": 40.94, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.24, + "MasakhaNEWSClusteringS2S (eng)": 53.93, + "MasakhaNEWSClusteringS2S (fra-Latn)": 39.84, + "MasakhaNEWSClusteringS2S (hau-Latn)": 19.24, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 28.88, + "MasakhaNEWSClusteringS2S (lin-Latn)": 42.22, + "MasakhaNEWSClusteringS2S (lug-Latn)": 43.63, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.29, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 59.77, + "MasakhaNEWSClusteringS2S (run-Latn)": 51.46, + "MasakhaNEWSClusteringS2S (sna-Latn)": 48.14, + "MasakhaNEWSClusteringS2S (som-Latn)": 25.14, + "MasakhaNEWSClusteringS2S (swa-Latn)": 7.28, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51, + "MasakhaNEWSClusteringS2S (xho-Latn)": 30.98, + "MasakhaNEWSClusteringS2S (yor-Latn)": 34.09, + "MasakhaNEWSClusteringS2S (fra)": 30.56, + "MedrxivClusteringP2P": 31.7, + "MedrxivClusteringS2S": 29.76, + "RedditClustering": 46.91, + "RedditClusteringP2P": 63.0, + "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 52.03, + "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.11, + "StackExchangeClustering": 58.37, + "StackExchangeClusteringP2P": 32.9, + "ThuNewsClusteringP2P": 58.05, + "ThuNewsClusteringS2S": 55.59, + "TwentyNewsgroupsClustering": 39.4 } ] }, "PairClassification": { "ap": [ { - "Model": "LaBSE-en-ru", - "OpusparcusPC (rus-Cyrl)": 87.18, - "TERRa (rus-Cyrl)": 55.61 + "Model": "multilingual-e5-large", + "CDSC-E (pol-Latn)": 74.47, + "CDSC-E": 74.47, + "Cmnli": 78.18, + "Ocnli": 61.6, + "OpusparcusPC (deu-Latn)": 97.27, + "OpusparcusPC (en)": 98.74, + "OpusparcusPC (fin-Latn)": 94.26, + "OpusparcusPC (fra-Latn)": 93.68, + "OpusparcusPC (rus-Cyrl)": 89.64, + "OpusparcusPC (swe-Latn)": 94.98, + "OpusparcusPC (fr)": 93.89, + "PPC": 92.18, + "PSC (pol-Latn)": 99.4, + "PSC": 99.39, + "PawsXPairClassification (deu-Latn)": 56.81, + "PawsXPairClassification (en)": 62.97, + "PawsXPairClassification (spa-Latn)": 56.85, + "PawsXPairClassification (fra-Latn)": 58.68, + "PawsXPairClassification (jpn-Hira)": 50.7, + "PawsXPairClassification (kor-Hang)": 52.08, + "PawsXPairClassification (cmn-Hans)": 56.82, + "PawsXPairClassification (fr)": 58.5, + "SICK-E-PL (pol-Latn)": 75.95, + "SICK-E-PL": 75.96, + "SprintDuplicateQuestions": 93.14, + "TERRa (rus-Cyrl)": 58.4, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 85.83 } ] }, "Reranking": { "map": [ { - "Model": "LaBSE-en-ru", - "RuBQReranking (rus-Cyrl)": 54.83 + "Model": "multilingual-e5-large", + "AlloprofReranking (fra-Latn)": 69.44, + "AlloprofReranking": 57.37, + "AskUbuntuDupQuestions": 59.24, + "CMedQAv1": 68.25, + "CMedQAv2": 68.56, + "MMarcoReranking (cmn-Hans)": 29.12, + "MMarcoReranking": 21.34, + "MindSmallReranking": 30.24, + "RuBQReranking (rus-Cyrl)": 75.58, + "SciDocsRR": 84.22, + "StackOverflowDupQuestions": 50.14, + "SyntecReranking (fra-Latn)": 85.45, + "SyntecReranking": 86.9, + "T2Reranking (cmn-Hans)": 66.32, + "T2Reranking": 65.83 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LaBSE-en-ru", - "RiaNewsRetrieval (rus-Cyrl)": 34.73, - "RuBQRetrieval (rus-Cyrl)": 29.03 + "Model": "multilingual-e5-large", + "AILACasedocs": 26.43, + "AILAStatutes": 20.84, + "ARCChallenge": 10.83, + "AlloprofRetrieval (fra-Latn)": 39.34, + "AlloprofRetrieval": 38.15, + "AlphaNLI": 13.59, + "ArguAna": 54.36, + "ArguAna-PL (pol-Latn)": 52.99, + "ArguAna-PL": 53.02, + "BSARDRetrieval (fra-Latn)": 21.28, + "BSARDRetrieval": 0.27, + "CmedqaRetrieval (cmn-Hans)": 28.66, + "CmedqaRetrieval": 28.67, + "CovidRetrieval (cmn-Hans)": 75.61, + "CovidRetrieval": 75.51, + "DBPedia-PL": 35.82, + "DuRetrieval (cmn-Hans)": 85.3, + "DuRetrieval": 85.32, + "EcomRetrieval (cmn-Hans)": 54.67, + "EcomRetrieval": 54.75, + "FiQA-PL (pol-Latn)": 32.97, + "FiQA-PL": 33.0, + "FiQA2018": 43.81, + "GerDaLIRSmall (deu-Latn)": 15.72, + "HellaSwag": 27.35, + "HotpotQA-PL": 67.41, + "LEMBNarrativeQARetrieval": 24.22, + "LEMBNeedleRetrieval": 28.0, + "LEMBPasskeyRetrieval": 38.25, + "LEMBQMSumRetrieval": 24.26, + "LEMBSummScreenFDRetrieval": 71.12, + "LEMBWikimQARetrieval": 56.8, + "LeCaRDv2 (zho-Hans)": 55.83, + "LegalBenchConsumerContractsQA": 73.3, + "LegalBenchCorporateLobbying": 89.72, + "LegalQuAD (deu-Latn)": 43.17, + "LegalSummarization": 62.1, + "MMarcoRetrieval (cmn-Hans)": 79.2, + "MMarcoRetrieval": 79.2, + "MSMARCO-PL": 33.38, + "MedicalRetrieval (cmn-Hans)": 51.44, + "MedicalRetrieval": 51.44, + "MintakaRetrieval (ara-Arab)": 26.5, + "MintakaRetrieval (deu-Latn)": 32.77, + "MintakaRetrieval (spa-Latn)": 34.23, + "MintakaRetrieval (fra-Latn)": 34.24, + "MintakaRetrieval (hin-Deva)": 27.45, + "MintakaRetrieval (ita-Latn)": 33.84, + "MintakaRetrieval (jpn-Hira)": 26.45, + "MintakaRetrieval (por-Latn)": 35.9, + "MintakaRetrieval (fr)": 25.2, + "NFCorpus": 33.95, + "NFCorpus-PL (pol-Latn)": 30.21, + "NFCorpus-PL": 30.24, + "NQ-PL": 52.79, + "PIQA": 28.82, + "Quail": 4.85, + "Quora-PL": 83.65, + "RARbCode": 58.92, + "RARbMath": 67.32, + "RiaNewsRetrieval (rus-Cyrl)": 80.67, + "RuBQRetrieval (rus-Cyrl)": 74.11, + "SCIDOCS": 17.45, + "SCIDOCS-PL (pol-Latn)": 13.82, + "SCIDOCS-PL": 13.81, + "SIQA": 5.36, + "SciFact": 70.42, + "SciFact-PL (pol-Latn)": 65.66, + "SciFact-PL": 65.66, + "SpartQA": 5.64, + "SyntecRetrieval (fra-Latn)": 82.39, + "SyntecRetrieval": 81.07, + "T2Retrieval (cmn-Hans)": 76.07, + "T2Retrieval": 76.11, + "TRECCOVID": 71.21, + "TRECCOVID-PL (pol-Latn)": 69.9, + "TRECCOVID-PL": 70.03, + "TempReasonL1": 1.14, + "TempReasonL2Fact": 42.97, + "TempReasonL2Pure": 2.05, + "TempReasonL3Fact": 38.22, + "TempReasonL3Pure": 8.31, + "Touche2020": 23.13, + "VideoRetrieval (cmn-Hans)": 58.28, + "VideoRetrieval": 58.25, + "WinoGrande": 54.99, + "XPQARetrieval (ara-Arab_ara-Arab)": 43.69, + "XPQARetrieval (eng-Latn_ara-Arab)": 30.86, + "XPQARetrieval (ara-Arab_eng-Latn)": 39.11, + "XPQARetrieval (deu-Latn_deu-Latn)": 76.83, + "XPQARetrieval (eng-Latn_deu-Latn)": 42.87, + "XPQARetrieval (deu-Latn_eng-Latn)": 68.25, + "XPQARetrieval (spa-Latn_spa-Latn)": 61.77, + "XPQARetrieval (eng-Latn_spa-Latn)": 37.55, + "XPQARetrieval (spa-Latn_eng-Latn)": 52.86, + "XPQARetrieval (fra-Latn_fra-Latn)": 61.38, + "XPQARetrieval (eng-Latn_fra-Latn)": 39.12, + "XPQARetrieval (fra-Latn_eng-Latn)": 57.93, + "XPQARetrieval (hin-Deva_hin-Deva)": 71.09, + "XPQARetrieval (eng-Latn_hin-Deva)": 32.39, + "XPQARetrieval (hin-Deva_eng-Latn)": 68.31, + "XPQARetrieval (ita-Latn_ita-Latn)": 74.32, + "XPQARetrieval (eng-Latn_ita-Latn)": 37.95, + "XPQARetrieval (ita-Latn_eng-Latn)": 64.54, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 74.11, + "XPQARetrieval (eng-Latn_jpn-Hira)": 38.31, + "XPQARetrieval (jpn-Hira_eng-Latn)": 65.42, + "XPQARetrieval (kor-Hang_kor-Hang)": 35.72, + "XPQARetrieval (eng-Latn_kor-Hang)": 31.09, + "XPQARetrieval (kor-Hang_eng-Latn)": 34.06, + "XPQARetrieval (pol-Latn_pol-Latn)": 51.01, + "XPQARetrieval (eng-Latn_pol-Latn)": 30.49, + "XPQARetrieval (pol-Latn_eng-Latn)": 44.66, + "XPQARetrieval (por-Latn_por-Latn)": 41.1, + "XPQARetrieval (eng-Latn_por-Latn)": 22.03, + "XPQARetrieval (por-Latn_eng-Latn)": 35.15, + "XPQARetrieval (tam-Taml_tam-Taml)": 39.51, + "XPQARetrieval (eng-Latn_tam-Taml)": 17.33, + "XPQARetrieval (tam-Taml_eng-Latn)": 33.67, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 66.27, + "XPQARetrieval (eng-Latn_cmn-Hans)": 26.24, + "XPQARetrieval (cmn-Hans_eng-Latn)": 55.15, + "XPQARetrieval (fr)": 66.15 } ] }, "STS": { "spearman": [ { - "Model": "LaBSE-en-ru", - "RUParaPhraserSTS (rus-Cyrl)": 65.87, - "RuSTSBenchmarkSTS (rus-Cyrl)": 73.32, - "STS22 (deu-Latn)": 38.9, - "STS22 (en)": 59.47, - "STS22 (pol-Latn_eng-Latn)": 58.73, - "STS22 (spa-Latn)": 60.85, - "STS22 (fra-Latn)": 74.98, - "STS22 (deu-Latn_eng-Latn)": 47.98, - "STS22 (deu-Latn_fra-Latn)": 59.4, - "STS22 (deu-Latn_pol-Latn)": 39.48, - "STS22 (pol-Latn)": 32.74, - "STS22 (tur-Latn)": 55.04, - "STS22 (spa-Latn_eng-Latn)": 70.8, - "STS22 (rus-Cyrl)": 58.53, - "STS22 (ita-Latn)": 68.58, - "STS22 (fra-Latn_pol-Latn)": 61.98, - "STS22 (spa-Latn_ita-Latn)": 66.83, - "STS22 (cmn-Hans_eng-Latn)": 24.98, - "STS22 (ara-Arab)": 31.85, - "STS22 (cmn-Hans)": 35.1, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 73.02 + "Model": "multilingual-e5-large", + "AFQMC (cmn-Hans)": 33.01, + "AFQMC": 33.02, + "ATEC (cmn-Hans)": 39.8, + "ATEC": 39.81, + "BIOSSES": 82.49, + "BQ (cmn-Hans)": 46.44, + "BQ": 46.44, + "CDSC-R (pol-Latn)": 91.0, + "CDSC-R": 91.0, + "LCQMC (cmn-Hans)": 75.95, + "LCQMC": 75.95, + "PAWSX (cmn-Hans)": 14.63, + "PAWSX": 14.63, + "QBQTC": 29.77, + "RUParaPhraserSTS (rus-Cyrl)": 71.82, + "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15, + "SICK-R": 80.23, + "SICK-R-PL (pol-Latn)": 75.08, + "SICK-R-PL": 75.08, + "SICKFr (fra-Latn)": 78.81, + "SICKFr": 78.78, + "STS12": 80.02, + "STS13": 81.55, + "STS14": 77.72, + "STS15": 89.31, + "STS16": 85.79, + "STS17 (en-en)": 88.12, + "STS17 (spa-Latn)": 86.71, + "STS17 (spa-Latn_eng-Latn)": 80.74, + "STS17 (eng-Latn_ara-Arab)": 75.03, + "STS17 (fra-Latn_eng-Latn)": 85.62, + "STS17 (kor-Hang)": 82.27, + "STS17 (ita-Latn_eng-Latn)": 84.52, + "STS17 (ara-Arab)": 77.83, + "STS17 (eng-Latn_tur-Latn)": 71.22, + "STS17 (eng-Latn_deu-Latn)": 86.15, + "STS17 (nld-Latn_eng-Latn)": 85.29, + "STS22 (spa-Latn)": 64.6, + "STS22 (spa-Latn_eng-Latn)": 72.51, + "STS22 (deu-Latn_eng-Latn)": 56.59, + "STS22 (cmn-Hans_eng-Latn)": 65.95, + "STS22 (deu-Latn_pol-Latn)": 49.58, + "STS22 (fra-Latn_pol-Latn)": 50.71, + "STS22 (en)": 63.66, + "STS22 (ara-Arab)": 56.95, + "STS22 (spa-Latn_ita-Latn)": 68.92, + "STS22 (tur-Latn)": 63.56, + "STS22 (deu-Latn_fra-Latn)": 67.96, + "STS22 (ita-Latn)": 76.99, + "STS22 (cmn-Hans)": 66.82, + "STS22 (rus-Cyrl)": 59.89, + "STS22 (fra-Latn)": 76.77, + "STS22 (pol-Latn_eng-Latn)": 65.54, + "STS22 (deu-Latn)": 56.58, + "STS22 (pol-Latn)": 34.65, + "STS22 (zh)": 65.64, + "STS22 (pl)": 34.66, + "STSB (cmn-Hans)": 81.08, + "STSB": 81.08, + "STSBenchmark": 87.29, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22, + "STSBenchmarkMultilingualSTS (en)": 87.29, + "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06, + "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63, + "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05, + "STSBenchmarkMultilingualSTS (por-Latn)": 73.31, + "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81, + "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28, + "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27, + "STSBenchmarkMultilingualSTS (fr)": 82.53 } ] }, "Summarization": { "spearman": [ { - "Model": "LaBSE-en-ru" + "Model": "multilingual-e5-large", + "SummEval": 29.65, + "SummEvalFr (fra-Latn)": 30.92, + "SummEvalFr": 30.92 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "LaBSE-en-ru" + "Model": "multilingual-e5-large" } ] } }, - "text2vec-base-chinese": { + "komninos": { "BitextMining": { "f1": [ { - "Model": "text2vec-base-chinese" + "Model": "komninos", + "BUCC (de-en)": 0.18, + "BUCC (fr-en)": 0.08, + "BUCC (ru-en)": 0.15, + "BUCC (zh-en)": 0.05, + "Tatoeba (afr-eng)": 4.82, + "Tatoeba (amh-eng)": 1.18, + "Tatoeba (ang-eng)": 8.54, + "Tatoeba (ara-eng)": 0.63, + "Tatoeba (arq-eng)": 0.4, + "Tatoeba (arz-eng)": 0.63, + "Tatoeba (ast-eng)": 11.69, + "Tatoeba (awa-eng)": 0.0, + "Tatoeba (aze-eng)": 3.22, + "Tatoeba (bel-eng)": 1.75, + "Tatoeba (ben-eng)": 0.2, + "Tatoeba (ber-eng)": 7.0, + "Tatoeba (bos-eng)": 9.31, + "Tatoeba (bre-eng)": 4.17, + "Tatoeba (bul-eng)": 1.29, + "Tatoeba (cat-eng)": 7.73, + "Tatoeba (cbk-eng)": 5.61, + "Tatoeba (ceb-eng)": 4.88, + "Tatoeba (ces-eng)": 3.55, + "Tatoeba (cha-eng)": 19.29, + "Tatoeba (cmn-eng)": 0.5, + "Tatoeba (cor-eng)": 4.15, + "Tatoeba (csb-eng)": 5.69, + "Tatoeba (cym-eng)": 8.4, + "Tatoeba (dan-eng)": 6.99, + "Tatoeba (deu-eng)": 3.67, + "Tatoeba (dsb-eng)": 5.33, + "Tatoeba (dtp-eng)": 4.25, + "Tatoeba (ell-eng)": 0.63, + "Tatoeba (epo-eng)": 2.45, + "Tatoeba (est-eng)": 2.69, + "Tatoeba (eus-eng)": 4.69, + "Tatoeba (fao-eng)": 7.61, + "Tatoeba (fin-eng)": 3.36, + "Tatoeba (fra-eng)": 7.0, + "Tatoeba (fry-eng)": 12.36, + "Tatoeba (gla-eng)": 3.07, + "Tatoeba (gle-eng)": 4.81, + "Tatoeba (glg-eng)": 8.12, + "Tatoeba (gsw-eng)": 18.87, + "Tatoeba (heb-eng)": 0.68, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (hrv-eng)": 5.41, + "Tatoeba (hsb-eng)": 6.32, + "Tatoeba (hun-eng)": 3.42, + "Tatoeba (hye-eng)": 0.97, + "Tatoeba (ido-eng)": 7.1, + "Tatoeba (ile-eng)": 13.61, + "Tatoeba (ina-eng)": 8.57, + "Tatoeba (ind-eng)": 7.26, + "Tatoeba (isl-eng)": 4.09, + "Tatoeba (ita-eng)": 5.54, + "Tatoeba (jav-eng)": 11.43, + "Tatoeba (jpn-eng)": 0.2, + "Tatoeba (kab-eng)": 2.71, + "Tatoeba (kat-eng)": 1.11, + "Tatoeba (kaz-eng)": 1.17, + "Tatoeba (khm-eng)": 0.55, + "Tatoeba (kor-eng)": 0.5, + "Tatoeba (kur-eng)": 8.55, + "Tatoeba (kzj-eng)": 4.61, + "Tatoeba (lat-eng)": 4.07, + "Tatoeba (lfn-eng)": 2.83, + "Tatoeba (lit-eng)": 0.95, + "Tatoeba (lvs-eng)": 3.25, + "Tatoeba (mal-eng)": 0.29, + "Tatoeba (mar-eng)": 0.2, + "Tatoeba (max-eng)": 14.53, + "Tatoeba (mhr-eng)": 0.2, + "Tatoeba (mkd-eng)": 0.2, + "Tatoeba (mon-eng)": 1.1, + "Tatoeba (nds-eng)": 10.37, + "Tatoeba (nld-eng)": 9.5, + "Tatoeba (nno-eng)": 4.49, + "Tatoeba (nob-eng)": 4.95, + "Tatoeba (nov-eng)": 14.53, + "Tatoeba (oci-eng)": 5.8, + "Tatoeba (orv-eng)": 0.24, + "Tatoeba (pam-eng)": 6.65, + "Tatoeba (pes-eng)": 0.5, + "Tatoeba (pms-eng)": 8.05, + "Tatoeba (pol-eng)": 5.13, + "Tatoeba (por-eng)": 5.87, + "Tatoeba (ron-eng)": 6.76, + "Tatoeba (rus-eng)": 0.2, + "Tatoeba (slk-eng)": 4.23, + "Tatoeba (slv-eng)": 6.05, + "Tatoeba (spa-eng)": 5.03, + "Tatoeba (sqi-eng)": 4.36, + "Tatoeba (srp-eng)": 1.77, + "Tatoeba (swe-eng)": 6.72, + "Tatoeba (swg-eng)": 8.54, + "Tatoeba (swh-eng)": 11.49, + "Tatoeba (tam-eng)": 1.3, + "Tatoeba (tat-eng)": 0.77, + "Tatoeba (tel-eng)": 0.85, + "Tatoeba (tgl-eng)": 2.61, + "Tatoeba (tha-eng)": 0.69, + "Tatoeba (tuk-eng)": 5.76, + "Tatoeba (tur-eng)": 5.24, + "Tatoeba (tzl-eng)": 15.51, + "Tatoeba (uig-eng)": 0.6, + "Tatoeba (ukr-eng)": 1.23, + "Tatoeba (urd-eng)": 0.4, + "Tatoeba (uzb-eng)": 4.73, + "Tatoeba (vie-eng)": 6.55, + "Tatoeba (war-eng)": 4.12, + "Tatoeba (wuu-eng)": 0.2, + "Tatoeba (xho-eng)": 4.33, + "Tatoeba (yid-eng)": 0.59, + "Tatoeba (yue-eng)": 0.5, + "Tatoeba (zsm-eng)": 7.27 } ] }, "Classification": { "accuracy": [ { - "Model": "text2vec-base-chinese", - "AmazonReviewsClassification (zh)": 34.12, - "IFlyTek": 42.05, - "JDReview": 82.14, - "MassiveIntentClassification (zh-CN)": 63.98, - "MassiveScenarioClassification (zh-CN)": 70.52, - "MultilingualSentiment": 60.98, - "OnlineShopping": 85.69, - "TNews": 43.01, - "Waimai": 77.22 + "Model": "komninos", + "AmazonCounterfactualClassification (en)": 60.54, + "AmazonPolarityClassification": 59.59, + "AmazonReviewsClassification (en)": 31.01, + "Banking77Classification": 67.05, + "EmotionClassification": 33.18, + "ImdbClassification": 63.98, + "MTOPDomainClassification (en)": 78.57, + "MTOPIntentClassification (en)": 57.07, + "MassiveIntentClassification (en)": 57.21, + "MassiveScenarioClassification (en)": 66.11, + "ToxicConversationsClassification": 67.76, + "TweetSentimentExtractionClassification": 49.68 } ] }, "Clustering": { "v_measure": [ { - "Model": "text2vec-base-chinese", - "CLSClusteringP2P": 35.27, - "CLSClusteringS2S": 32.42, - "ThuNewsClusteringP2P": 42.92, - "ThuNewsClusteringS2S": 40.01 + "Model": "komninos", + "ArxivClusteringP2P": 34.73, + "ArxivClusteringS2S": 26.01, + "BiorxivClusteringP2P": 29.76, + "BiorxivClusteringS2S": 20.71, + "BlurbsClusteringP2P": 11.37, + "BlurbsClusteringS2S": 8.01, + "MedrxivClusteringP2P": 26.65, + "MedrxivClusteringS2S": 21.5, + "RedditClustering": 28.84, + "RedditClusteringP2P": 7.37, + "StackExchangeClustering": 39.04, + "StackExchangeClusteringP2P": 30.23, + "TenKGnadClusteringP2P": 15.89, + "TenKGnadClusteringS2S": 4.84, + "TwentyNewsgroupsClustering": 27.42 } ] }, "PairClassification": { "ap": [ { - "Model": "text2vec-base-chinese", - "Cmnli": 73.87, - "Ocnli": 60.95 + "Model": "komninos", + "SprintDuplicateQuestions": 85.55, + "TwitterSemEval2015": 53.85, + "TwitterURLCorpus": 79.41 } ] }, "Reranking": { "map": [ { - "Model": "text2vec-base-chinese", - "CMedQAv1": 59.26, - "CMedQAv2": 59.82, - "MMarcoReranking": 12.76, - "T2Reranking": 65.95 + "Model": "komninos", + "AskUbuntuDupQuestions": 50.88, + "MindSmallReranking": 28.92, + "SciDocsRR": 63.55, + "StackOverflowDupQuestions": 35.65 } ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "text2vec-base-chinese", - "CmedqaRetrieval": 15.91, - "CovidRetrieval": 44.81, - "DuRetrieval": 52.23, - "EcomRetrieval": 34.6, - "MMarcoRetrieval": 44.06, - "MedicalRetrieval": 27.56, - "T2Retrieval": 51.67, - "VideoRetrieval": 39.52 + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "komninos", + "ArguAna": 30.96, + "CQADupstackRetrieval": 16.79, + "ClimateFEVER": 14.87, + "DBPedia": 15.88, + "FEVER": 15.56, + "FiQA2018": 10.49, + "HotpotQA": 20.77, + "MSMARCO": 9.75, + "NFCorpus": 11.79, + "NQ": 12.75, + "QuoraRetrieval": 71.57, + "SCIDOCS": 8.47, + "SciFact": 29.53, + "TRECCOVID": 35.92, + "Touche2020": 13.17 } ] }, "STS": { "spearman": [ { - "Model": "text2vec-base-chinese", - "AFQMC": 26.06, - "ATEC": 31.93, - "BQ": 42.67, - "LCQMC": 70.16, - "PAWSX": 17.21, - "QBQTC": 24.62, - "STS22 (zh)": 55.35, - "STSB": 79.3 + "Model": "komninos", + "BIOSSES": 50.25, + "SICK-R": 55.49, + "STS12": 53.51, + "STS13": 70.8, + "STS14": 63.56, + "STS15": 74.08, + "STS16": 64.6, + "STS17 (ar-ar)": 13.78, + "STS17 (en-ar)": 9.08, + "STS17 (en-de)": -3.11, + "STS17 (en-en)": 76.91, + "STS17 (en-tr)": -0.45, + "STS17 (es-en)": -8.18, + "STS17 (es-es)": 48.23, + "STS17 (fr-en)": 5.81, + "STS17 (it-en)": 3.64, + "STS17 (ko-ko)": 2.54, + "STS17 (nl-en)": 0.44, + "STS22 (ar)": 32.42, + "STS22 (de)": 33.04, + "STS22 (de-en)": 28.65, + "STS22 (de-fr)": 14.77, + "STS22 (de-pl)": 11.21, + "STS22 (en)": 53.89, + "STS22 (es)": 48.53, + "STS22 (es-en)": 26.97, + "STS22 (es-it)": 41.1, + "STS22 (fr)": 49.43, + "STS22 (fr-pl)": 39.44, + "STS22 (it)": 57.77, + "STS22 (pl)": 12.47, + "STS22 (pl-en)": 45.55, + "STS22 (ru)": 19.44, + "STS22 (tr)": 47.38, + "STS22 (zh)": 4.78, + "STS22 (zh-en)": 14.05, + "STSBenchmark": 61.55 } ] }, "Summarization": { "spearman": [ { - "Model": "text2vec-base-chinese" + "Model": "komninos", + "SummEval": 30.49 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text2vec-base-chinese" + "Model": "komninos" } ] } }, - "herbert-base-retrieval-v2": { + "voyage-lite-01-instruct": { "BitextMining": { "f1": [ { - "Model": "herbert-base-retrieval-v2" + "Model": "voyage-lite-01-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "herbert-base-retrieval-v2", - "AllegroReviews": 34.11, - "CBD": 68.35, - "MassiveIntentClassification (pl)": 65.53, - "MassiveScenarioClassification (pl)": 68.51, - "PAC": 68.4, - "PolEmo2.0-IN": 64.18, - "PolEmo2.0-OUT": 45.73 + "Model": "voyage-lite-01-instruct", + "AmazonCounterfactualClassification (en)": 71.43, + "AmazonPolarityClassification": 96.41, + "AmazonReviewsClassification (en)": 57.06, + "Banking77Classification": 81.64, + "EmotionClassification": 48.29, + "ImdbClassification": 95.49, + "MTOPDomainClassification (en)": 96.3, + "MTOPIntentClassification (en)": 67.93, + "MassiveIntentClassification (en)": 71.29, + "MassiveScenarioClassification (en)": 76.74, + "ToxicConversationsClassification": 75.45, + "TweetSentimentExtractionClassification": 59.44 } ] }, "Clustering": { "v_measure": [ { - "Model": "herbert-base-retrieval-v2", - "8TagsClustering": 28.15 + "Model": "voyage-lite-01-instruct", + "ArxivClusteringP2P": 47.92, + "ArxivClusteringS2S": 42.42, + "BiorxivClusteringP2P": 38.72, + "BiorxivClusteringS2S": 36.6, + "MedrxivClusteringP2P": 34.04, + "MedrxivClusteringS2S": 32.81, + "RedditClustering": 61.56, + "RedditClusteringP2P": 65.35, + "StackExchangeClustering": 70.16, + "StackExchangeClusteringP2P": 38.23, + "TwentyNewsgroupsClustering": 53.56 } ] }, "PairClassification": { "ap": [ { - "Model": "herbert-base-retrieval-v2", - "CDSC-E": 63.31, - "PPC": 84.18, - "PSC": 98.87, - "SICK-E-PL": 54.93 + "Model": "voyage-lite-01-instruct", + "SprintDuplicateQuestions": 96.01, + "TwitterSemEval2015": 76.87, + "TwitterURLCorpus": 86.84 } ] }, "Reranking": { "map": [ { - "Model": "herbert-base-retrieval-v2" + "Model": "voyage-lite-01-instruct", + "AskUbuntuDupQuestions": 65.77, + "MindSmallReranking": 31.69, + "SciDocsRR": 87.03, + "StackOverflowDupQuestions": 54.49 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "herbert-base-retrieval-v2", - "ArguAna-PL": 41.97, - "DBPedia-PL": 24.07, - "FiQA-PL": 24.25, - "HotpotQA-PL": 43.41, - "MSMARCO-PL": 51.56, - "NFCorpus-PL": 25.95, - "NQ-PL": 35.09, - "Quora-PL": 78.86, - "SCIDOCS-PL": 11.0, - "SciFact-PL": 51.92, - "TRECCOVID-PL": 42.64 + "Model": "voyage-lite-01-instruct", + "ArguAna": 58.73, + "CQADupstackRetrieval": 45.11, + "ClimateFEVER": 37.47, + "DBPedia": 43.42, + "FEVER": 89.71, + "FiQA2018": 44.79, + "HotpotQA": 70.46, + "MSMARCO": 39.66, + "NFCorpus": 43.33, + "NQ": 60.65, + "QuoraRetrieval": 87.83, + "SCIDOCS": 23.19, + "SciFact": 73.64, + "TRECCOVID": 78.92, + "Touche2020": 36.83 } ] }, "STS": { "spearman": [ { - "Model": "herbert-base-retrieval-v2", - "CDSC-R": 86.18, - "SICK-R-PL": 64.67, - "STS22 (pl)": 39.73 + "Model": "voyage-lite-01-instruct", + "BIOSSES": 84.85, + "SICK-R": 79.71, + "STS12": 77.09, + "STS13": 88.91, + "STS14": 82.08, + "STS15": 89.21, + "STS16": 84.74, + "STS17 (en-en)": 90.73, + "STS22 (en)": 62.1, + "STSBenchmark": 89.86 } ] }, "Summarization": { "spearman": [ { - "Model": "herbert-base-retrieval-v2" + "Model": "voyage-lite-01-instruct", + "SummEval": 30.97 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "herbert-base-retrieval-v2" + "Model": "voyage-lite-01-instruct" } ] } }, - "voyage-law-2": { + "nomic-embed-text-v1.5-256": { "BitextMining": { "f1": [ { - "Model": "voyage-law-2" + "Model": "nomic-embed-text-v1.5-256" } ] }, "Classification": { "accuracy": [ { - "Model": "voyage-law-2", - "AmazonReviewsClassification (fr)": 41.98, - "MTOPDomainClassification (fr)": 90.12, - "MTOPIntentClassification (fr)": 62.44, - "MasakhaNEWSClassification (fra)": 76.42, - "MassiveIntentClassification (fr)": 66.94, - "MassiveScenarioClassification (fr)": 72.78 + "Model": "nomic-embed-text-v1.5-256", + "AmazonCounterfactualClassification (en)": 72.94, + "AmazonPolarityClassification": 91.35, + "AmazonReviewsClassification (en)": 45.73, + "Banking77Classification": 83.69, + "EmotionClassification": 45.88, + "ImdbClassification": 83.99, + "MTOPDomainClassification (en)": 91.68, + "MTOPIntentClassification (en)": 72.47, + "MassiveIntentClassification (en)": 71.76, + "MassiveScenarioClassification (en)": 75.67, + "ToxicConversationsClassification": 70.87, + "TweetSentimentExtractionClassification": 59.2 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-law-2", - "AlloProfClusteringP2P": 62.5, - "AlloProfClusteringS2S": 44.28, - "HALClusteringS2S": 26.36, - "MLSUMClusteringP2P (fr)": 44.03, - "MLSUMClusteringS2S (fr)": 42.95, - "MasakhaNEWSClusteringP2P (fra)": 50.68, - "MasakhaNEWSClusteringS2S (fra)": 38.79 + "Model": "nomic-embed-text-v1.5-256", + "ArxivClusteringP2P": 44.82, + "ArxivClusteringS2S": 35.32, + "BiorxivClusteringP2P": 38.19, + "BiorxivClusteringS2S": 31.83, + "MedrxivClusteringP2P": 34.08, + "MedrxivClusteringS2S": 30.98, + "RedditClustering": 54.92, + "RedditClusteringP2P": 60.23, + "StackExchangeClustering": 61.81, + "StackExchangeClusteringP2P": 34.03, + "TwentyNewsgroupsClustering": 48.56 } ] }, "PairClassification": { "ap": [ { - "Model": "voyage-law-2", - "OpusparcusPC (fr)": 93.06, - "PawsXPairClassification (fr)": 61.54 + "Model": "nomic-embed-text-v1.5-256", + "SprintDuplicateQuestions": 92.31, + "TwitterSemEval2015": 73.61, + "TwitterURLCorpus": 86.34 } ] }, "Reranking": { "map": [ { - "Model": "voyage-law-2", - "AlloprofReranking": 72.92, - "SyntecReranking": 91.2 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "voyage-law-2", - "AILACasedocs": 44.56, - "AILAStatutes": 45.51, - "AlloprofRetrieval": 57.28, - "BSARDRetrieval": 11.83, - "GerDaLIRSmall": 44.91, - "LEMBNarrativeQARetrieval": 55.78, - "LEMBNeedleRetrieval": 80.5, - "LEMBPasskeyRetrieval": 93.75, - "LEMBQMSumRetrieval": 57.26, - "LEMBSummScreenFDRetrieval": 98.72, - "LEMBWikimQARetrieval": 87.08, - "LeCaRDv2": 72.75, - "LegalBenchConsumerContractsQA": 83.27, - "LegalBenchCorporateLobbying": 95.66, - "LegalQuAD": 67.47, - "LegalSummarization": 68.96, - "MintakaRetrieval (fr)": 34.92, - "SyntecRetrieval": 87.33, - "XPQARetrieval (fr)": 73.56 + "Model": "nomic-embed-text-v1.5-256", + "AskUbuntuDupQuestions": 61.34, + "MindSmallReranking": 30.04, + "SciDocsRR": 79.4, + "StackOverflowDupQuestions": 49.95 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "nomic-embed-text-v1.5-256", + "ArguAna": 45.44, + "CQADupstackRetrieval": 37.61, + "ClimateFEVER": 39.63, + "DBPedia": 39.42, + "FEVER": 84.4, + "FiQA2018": 35.0, + "HotpotQA": 67.78, + "MSMARCO": 41.38, + "NFCorpus": 32.54, + "NQ": 57.1, + "QuoraRetrieval": 87.65, + "SCIDOCS": 16.76, + "SciFact": 68.24, + "TRECCOVID": 80.65, + "Touche2020": 28.49 } ] }, "STS": { "spearman": [ { - "Model": "voyage-law-2", - "SICKFr": 74.09, - "STS22 (fr)": 83.75, - "STSBenchmarkMultilingualSTS (fr)": 83.02 + "Model": "nomic-embed-text-v1.5-256", + "BIOSSES": 81.58, + "SICK-R": 79.24, + "STS12": 78.16, + "STS13": 86.01, + "STS14": 81.25, + "STS15": 86.51, + "STS16": 84.24, + "STS17 (en-en)": 86.44, + "STS22 (en)": 65.14, + "STSBenchmark": 84.8 } ] }, "Summarization": { "spearman": [ { - "Model": "voyage-law-2", - "SummEvalFr": 30.34 + "Model": "nomic-embed-text-v1.5-256", + "SummEval": 30.05 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "voyage-law-2" + "Model": "nomic-embed-text-v1.5-256" } ] } }, - "bert-base-multilingual-cased": { + "e5-mistral-7b-instruct-noinstruct": { "BitextMining": { "f1": [ { - "Model": "bert-base-multilingual-cased" + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "Classification": { "accuracy": [ { - "Model": "bert-base-multilingual-cased", - "AmazonReviewsClassification (fr)": 29.39, - "MTOPDomainClassification (fr)": 63.61, - "MTOPIntentClassification (fr)": 37.84, - "MasakhaNEWSClassification (fra)": 64.0, - "MassiveIntentClassification (fr)": 37.3, - "MassiveScenarioClassification (fr)": 44.47 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "Clustering": { "v_measure": [ { - "Model": "bert-base-multilingual-cased", - "AlloProfClusteringP2P": 51.5, - "AlloProfClusteringS2S": 43.06, - "HALClusteringS2S": 20.81, - "MLSUMClusteringP2P": 40.9, - "MLSUMClusteringS2S": 31.8, - "MasakhaNEWSClusteringP2P (fra)": 24.23, - "MasakhaNEWSClusteringS2S (fra)": 24.46 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "PairClassification": { "ap": [ { - "Model": "bert-base-multilingual-cased", - "OpusparcusPC (fr)": 86.77, - "PawsXPairClassification (fr)": 53.39 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "Reranking": { "map": [ { - "Model": "bert-base-multilingual-cased", - "AlloprofReranking": 36.23, - "SyntecReranking": 53.25 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bert-base-multilingual-cased", - "AlloprofRetrieval": 1.63, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 3.55, - "SyntecRetrieval": 18.95, - "XPQARetrieval (fr)": 18.49 + "Model": "e5-mistral-7b-instruct-noinstruct", + "ARCChallenge": 20.48, + "AlphaNLI": 18.88, + "HellaSwag": 32.25, + "PIQA": 32.8, + "Quail": 6.25, + "RARbCode": 79.84, + "RARbMath": 76.19, + "SIQA": 5.08, + "SpartQA": 10.87, + "TempReasonL1": 3.04, + "TempReasonL2Fact": 35.63, + "TempReasonL2Pure": 9.32, + "TempReasonL3Fact": 30.41, + "TempReasonL3Pure": 14.39, + "WinoGrande": 45.18 } ] }, "STS": { "spearman": [ { - "Model": "bert-base-multilingual-cased", - "SICKFr": 58.75, - "STS22 (fr)": 39.05, - "STSBenchmarkMultilingualSTS (fr)": 52.25 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "Summarization": { "spearman": [ { - "Model": "bert-base-multilingual-cased", - "SummEvalFr": 28.81 + "Model": "e5-mistral-7b-instruct-noinstruct" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bert-base-multilingual-cased" + "Model": "e5-mistral-7b-instruct-noinstruct" } ] } }, - "LLM2Vec-Meta-Llama-3-unsupervised": { + "e5-base-4k": { "BitextMining": { "f1": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised" + "Model": "e5-base-4k" } ] }, "Classification": { "accuracy": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "AmazonCounterfactualClassification (en)": 75.7, - "AmazonPolarityClassification": 80.68, - "AmazonReviewsClassification (en)": 40.0, - "Banking77Classification": 84.77, - "EmotionClassification": 47.08, - "ImdbClassification": 75.19, - "MTOPDomainClassification (en)": 94.47, - "MTOPIntentClassification (en)": 81.09, - "MassiveIntentClassification (en)": 75.01, - "MassiveScenarioClassification (en)": 79.16, - "ToxicConversationsClassification": 71.85, - "TweetSentimentExtractionClassification": 57.61 + "Model": "e5-base-4k" } ] }, "Clustering": { "v_measure": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "ArxivClusteringP2P": 49.22, - "ArxivClusteringS2S": 41.71, - "BiorxivClusteringP2P": 38.39, - "BiorxivClusteringS2S": 31.31, - "MedrxivClusteringP2P": 31.47, - "MedrxivClusteringS2S": 27.87, - "RedditClustering": 43.67, - "RedditClusteringP2P": 61.67, - "StackExchangeClustering": 68.2, - "StackExchangeClusteringP2P": 36.36, - "TwentyNewsgroupsClustering": 32.01 + "Model": "e5-base-4k" } ] }, "PairClassification": { "ap": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "SprintDuplicateQuestions": 88.14, - "TwitterSemEval2015": 66.6, - "TwitterURLCorpus": 79.3 + "Model": "e5-base-4k" } ] }, "Reranking": { "map": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "AskUbuntuDupQuestions": 57.16, - "MindSmallReranking": 30.1, - "SciDocsRR": 76.28, - "StackOverflowDupQuestions": 48.82 + "Model": "e5-base-4k" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "ArguAna": 51.73, - "CQADupstackRetrieval": 32.4, - "ClimateFEVER": 23.58, - "DBPedia": 26.78, - "FEVER": 53.42, - "FiQA2018": 28.56, - "HotpotQA": 52.37, - "MSMARCO": 17.47, - "NFCorpus": 26.28, - "NQ": 37.65, - "QuoraRetrieval": 84.64, - "SCIDOCS": 10.39, - "SciFact": 66.36, - "TRECCOVID": 63.34, - "Touche2020": 12.82 + "Model": "e5-base-4k", + "LEMBNarrativeQARetrieval": 30.35, + "LEMBNeedleRetrieval": 41.5, + "LEMBPasskeyRetrieval": 67.25, + "LEMBQMSumRetrieval": 35.6, + "LEMBSummScreenFDRetrieval": 95.23, + "LEMBWikimQARetrieval": 69.19 } ] }, "STS": { "spearman": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "BIOSSES": 84.67, - "SICK-R": 72.16, - "STS12": 61.6, - "STS13": 79.71, - "STS14": 72.11, - "STS15": 82.18, - "STS16": 79.41, - "STS17 (en-en)": 85.44, - "STS22 (en)": 63.9, - "STSBenchmark": 77.44 + "Model": "e5-base-4k" } ] }, "Summarization": { "spearman": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised", - "SummEval": 31.45 + "Model": "e5-base-4k" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "LLM2Vec-Meta-Llama-3-unsupervised" + "Model": "e5-base-4k" } ] } }, - "text-search-davinci-001": { + "sentence-t5-xl": { "BitextMining": { "f1": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "BUCC (de-en)": 95.04, + "BUCC (fr-en)": 94.96, + "BUCC (ru-en)": 8.33, + "BUCC (zh-en)": 1.3, + "Tatoeba (afr-eng)": 41.84, + "Tatoeba (amh-eng)": 0.03, + "Tatoeba (ang-eng)": 37.87, + "Tatoeba (ara-eng)": 0.61, + "Tatoeba (arq-eng)": 0.74, + "Tatoeba (arz-eng)": 0.42, + "Tatoeba (ast-eng)": 65.41, + "Tatoeba (awa-eng)": 1.46, + "Tatoeba (aze-eng)": 8.79, + "Tatoeba (bel-eng)": 5.76, + "Tatoeba (ben-eng)": 0.01, + "Tatoeba (ber-eng)": 5.92, + "Tatoeba (bos-eng)": 16.12, + "Tatoeba (bre-eng)": 6.12, + "Tatoeba (bul-eng)": 9.06, + "Tatoeba (cat-eng)": 57.4, + "Tatoeba (cbk-eng)": 57.68, + "Tatoeba (ceb-eng)": 12.56, + "Tatoeba (ces-eng)": 9.47, + "Tatoeba (cha-eng)": 27.13, + "Tatoeba (cmn-eng)": 1.82, + "Tatoeba (cor-eng)": 3.87, + "Tatoeba (csb-eng)": 14.41, + "Tatoeba (cym-eng)": 6.69, + "Tatoeba (dan-eng)": 54.87, + "Tatoeba (deu-eng)": 93.72, + "Tatoeba (dsb-eng)": 14.74, + "Tatoeba (dtp-eng)": 5.84, + "Tatoeba (ell-eng)": 0.6, + "Tatoeba (epo-eng)": 30.8, + "Tatoeba (est-eng)": 5.39, + "Tatoeba (eus-eng)": 11.9, + "Tatoeba (fao-eng)": 28.08, + "Tatoeba (fin-eng)": 6.81, + "Tatoeba (fra-eng)": 85.29, + "Tatoeba (fry-eng)": 38.68, + "Tatoeba (gla-eng)": 2.96, + "Tatoeba (gle-eng)": 3.74, + "Tatoeba (glg-eng)": 70.0, + "Tatoeba (gsw-eng)": 30.49, + "Tatoeba (heb-eng)": 0.87, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (hrv-eng)": 17.43, + "Tatoeba (hsb-eng)": 14.69, + "Tatoeba (hun-eng)": 7.28, + "Tatoeba (hye-eng)": 0.77, + "Tatoeba (ido-eng)": 46.65, + "Tatoeba (ile-eng)": 59.43, + "Tatoeba (ina-eng)": 82.71, + "Tatoeba (ind-eng)": 37.26, + "Tatoeba (isl-eng)": 11.21, + "Tatoeba (ita-eng)": 79.77, + "Tatoeba (jav-eng)": 7.81, + "Tatoeba (jpn-eng)": 0.91, + "Tatoeba (kab-eng)": 2.23, + "Tatoeba (kat-eng)": 1.48, + "Tatoeba (kaz-eng)": 1.77, + "Tatoeba (khm-eng)": 0.38, + "Tatoeba (kor-eng)": 1.96, + "Tatoeba (kur-eng)": 12.11, + "Tatoeba (kzj-eng)": 6.13, + "Tatoeba (lat-eng)": 27.84, + "Tatoeba (lfn-eng)": 45.89, + "Tatoeba (lit-eng)": 5.94, + "Tatoeba (lvs-eng)": 8.11, + "Tatoeba (mal-eng)": 0.59, + "Tatoeba (mar-eng)": 0.03, + "Tatoeba (max-eng)": 21.7, + "Tatoeba (mhr-eng)": 0.68, + "Tatoeba (mkd-eng)": 5.92, + "Tatoeba (mon-eng)": 2.39, + "Tatoeba (nds-eng)": 45.04, + "Tatoeba (nld-eng)": 64.75, + "Tatoeba (nno-eng)": 36.74, + "Tatoeba (nob-eng)": 54.77, + "Tatoeba (nov-eng)": 57.12, + "Tatoeba (oci-eng)": 34.39, + "Tatoeba (orv-eng)": 2.04, + "Tatoeba (pam-eng)": 8.34, + "Tatoeba (pes-eng)": 0.87, + "Tatoeba (pms-eng)": 38.06, + "Tatoeba (pol-eng)": 28.35, + "Tatoeba (por-eng)": 83.61, + "Tatoeba (ron-eng)": 65.27, + "Tatoeba (rus-eng)": 30.42, + "Tatoeba (slk-eng)": 13.19, + "Tatoeba (slv-eng)": 13.49, + "Tatoeba (spa-eng)": 89.18, + "Tatoeba (sqi-eng)": 14.66, + "Tatoeba (srp-eng)": 13.24, + "Tatoeba (swe-eng)": 60.67, + "Tatoeba (swg-eng)": 34.76, + "Tatoeba (swh-eng)": 8.07, + "Tatoeba (tam-eng)": 0.36, + "Tatoeba (tat-eng)": 1.46, + "Tatoeba (tel-eng)": 0.67, + "Tatoeba (tgl-eng)": 25.22, + "Tatoeba (tha-eng)": 1.58, + "Tatoeba (tuk-eng)": 4.99, + "Tatoeba (tur-eng)": 7.72, + "Tatoeba (tzl-eng)": 38.49, + "Tatoeba (uig-eng)": 0.87, + "Tatoeba (ukr-eng)": 9.12, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (uzb-eng)": 5.48, + "Tatoeba (vie-eng)": 8.45, + "Tatoeba (war-eng)": 13.75, + "Tatoeba (wuu-eng)": 1.44, + "Tatoeba (xho-eng)": 9.15, + "Tatoeba (yid-eng)": 0.28, + "Tatoeba (yue-eng)": 0.98, + "Tatoeba (zsm-eng)": 35.71 } ] }, "Classification": { "accuracy": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "AmazonCounterfactualClassification (de)": 67.01, + "AmazonCounterfactualClassification (en)": 76.01, + "AmazonCounterfactualClassification (en-ext)": 77.29, + "AmazonCounterfactualClassification (ja)": 45.61, + "AmazonPolarityClassification": 93.17, + "AmazonReviewsClassification (de)": 44.05, + "AmazonReviewsClassification (en)": 48.18, + "AmazonReviewsClassification (es)": 45.01, + "AmazonReviewsClassification (fr)": 43.52, + "AmazonReviewsClassification (ja)": 22.23, + "AmazonReviewsClassification (zh)": 21.88, + "Banking77Classification": 80.88, + "EmotionClassification": 51.95, + "ImdbClassification": 87.54, + "MTOPDomainClassification (de)": 83.28, + "MTOPDomainClassification (en)": 90.73, + "MTOPDomainClassification (es)": 85.32, + "MTOPDomainClassification (fr)": 85.14, + "MTOPDomainClassification (hi)": 20.85, + "MTOPDomainClassification (th)": 15.62, + "MTOPIntentClassification (de)": 54.65, + "MTOPIntentClassification (en)": 68.15, + "MTOPIntentClassification (es)": 57.38, + "MTOPIntentClassification (fr)": 54.39, + "MTOPIntentClassification (hi)": 3.28, + "MTOPIntentClassification (th)": 5.08, + "MasakhaNEWSClassification (fra)": 80.09, + "MassiveIntentClassification (af)": 40.17, + "MassiveIntentClassification (am)": 2.18, + "MassiveIntentClassification (ar)": 4.18, + "MassiveIntentClassification (az)": 30.02, + "MassiveIntentClassification (bn)": 2.6, + "MassiveIntentClassification (cy)": 29.15, + "MassiveIntentClassification (da)": 47.69, + "MassiveIntentClassification (de)": 57.43, + "MassiveIntentClassification (el)": 9.96, + "MassiveIntentClassification (en)": 72.09, + "MassiveIntentClassification (es)": 57.97, + "MassiveIntentClassification (fa)": 3.6, + "MassiveIntentClassification (fi)": 34.02, + "MassiveIntentClassification (fr)": 60.99, + "MassiveIntentClassification (he)": 2.51, + "MassiveIntentClassification (hi)": 3.02, + "MassiveIntentClassification (hu)": 31.66, + "MassiveIntentClassification (hy)": 3.32, + "MassiveIntentClassification (id)": 41.53, + "MassiveIntentClassification (is)": 30.25, + "MassiveIntentClassification (it)": 56.57, + "MassiveIntentClassification (ja)": 3.5, + "MassiveIntentClassification (jv)": 31.67, + "MassiveIntentClassification (ka)": 2.79, + "MassiveIntentClassification (km)": 5.43, + "MassiveIntentClassification (kn)": 2.79, + "MassiveIntentClassification (ko)": 2.67, + "MassiveIntentClassification (lv)": 34.25, + "MassiveIntentClassification (ml)": 2.98, + "MassiveIntentClassification (mn)": 20.99, + "MassiveIntentClassification (ms)": 37.43, + "MassiveIntentClassification (my)": 4.02, + "MassiveIntentClassification (nb)": 45.91, + "MassiveIntentClassification (nl)": 50.51, + "MassiveIntentClassification (pl)": 43.95, + "MassiveIntentClassification (pt)": 57.95, + "MassiveIntentClassification (ro)": 49.37, + "MassiveIntentClassification (ru)": 33.46, + "MassiveIntentClassification (sl)": 36.33, + "MassiveIntentClassification (sq)": 37.65, + "MassiveIntentClassification (sv)": 46.35, + "MassiveIntentClassification (sw)": 30.6, + "MassiveIntentClassification (ta)": 1.79, + "MassiveIntentClassification (te)": 2.26, + "MassiveIntentClassification (th)": 4.02, + "MassiveIntentClassification (tl)": 38.92, + "MassiveIntentClassification (tr)": 32.05, + "MassiveIntentClassification (ur)": 2.7, + "MassiveIntentClassification (vi)": 21.47, + "MassiveIntentClassification (zh-CN)": 0.59, + "MassiveIntentClassification (zh-TW)": 3.24, + "MassiveScenarioClassification (af)": 50.81, + "MassiveScenarioClassification (am)": 6.95, + "MassiveScenarioClassification (ar)": 12.32, + "MassiveScenarioClassification (az)": 38.79, + "MassiveScenarioClassification (bn)": 8.0, + "MassiveScenarioClassification (cy)": 33.91, + "MassiveScenarioClassification (da)": 55.79, + "MassiveScenarioClassification (de)": 65.33, + "MassiveScenarioClassification (el)": 16.89, + "MassiveScenarioClassification (en)": 73.26, + "MassiveScenarioClassification (es)": 62.52, + "MassiveScenarioClassification (fa)": 6.08, + "MassiveScenarioClassification (fi)": 43.34, + "MassiveScenarioClassification (fr)": 66.42, + "MassiveScenarioClassification (he)": 7.55, + "MassiveScenarioClassification (hi)": 7.44, + "MassiveScenarioClassification (hu)": 40.85, + "MassiveScenarioClassification (hy)": 9.25, + "MassiveScenarioClassification (id)": 51.92, + "MassiveScenarioClassification (is)": 40.09, + "MassiveScenarioClassification (it)": 62.94, + "MassiveScenarioClassification (ja)": 7.9, + "MassiveScenarioClassification (jv)": 41.33, + "MassiveScenarioClassification (ka)": 7.76, + "MassiveScenarioClassification (km)": 9.19, + "MassiveScenarioClassification (kn)": 8.36, + "MassiveScenarioClassification (ko)": 6.13, + "MassiveScenarioClassification (lv)": 40.7, + "MassiveScenarioClassification (ml)": 6.98, + "MassiveScenarioClassification (mn)": 27.0, + "MassiveScenarioClassification (ms)": 46.9, + "MassiveScenarioClassification (my)": 9.55, + "MassiveScenarioClassification (nb)": 53.43, + "MassiveScenarioClassification (nl)": 59.65, + "MassiveScenarioClassification (pl)": 49.87, + "MassiveScenarioClassification (pt)": 62.18, + "MassiveScenarioClassification (ro)": 58.22, + "MassiveScenarioClassification (ru)": 40.73, + "MassiveScenarioClassification (sl)": 43.66, + "MassiveScenarioClassification (sq)": 49.25, + "MassiveScenarioClassification (sv)": 57.17, + "MassiveScenarioClassification (sw)": 40.55, + "MassiveScenarioClassification (ta)": 7.46, + "MassiveScenarioClassification (te)": 7.03, + "MassiveScenarioClassification (th)": 8.52, + "MassiveScenarioClassification (tl)": 51.74, + "MassiveScenarioClassification (tr)": 43.01, + "MassiveScenarioClassification (ur)": 9.61, + "MassiveScenarioClassification (vi)": 28.91, + "MassiveScenarioClassification (zh-CN)": 5.86, + "MassiveScenarioClassification (zh-TW)": 7.14, + "ToxicConversationsClassification": 70.95, + "TweetSentimentExtractionClassification": 61.21 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "AlloProfClusteringP2P": 60.37, + "AlloProfClusteringS2S": 40.76, + "ArxivClusteringP2P": 41.62, + "ArxivClusteringS2S": 31.17, + "BiorxivClusteringP2P": 36.43, + "BiorxivClusteringS2S": 26.47, + "HALClusteringS2S": 20.28, + "MLSUMClusteringP2P": 41.61, + "MLSUMClusteringS2S": 33.6, + "MasakhaNEWSClusteringP2P (fra)": 62.82, + "MasakhaNEWSClusteringS2S (fra)": 31.74, + "MedrxivClusteringP2P": 32.3, + "MedrxivClusteringS2S": 26.93, + "RedditClustering": 57.03, + "RedditClusteringP2P": 62.34, + "StackExchangeClustering": 67.13, + "StackExchangeClusteringP2P": 34.79, + "TwentyNewsgroupsClustering": 49.53 } ] }, "PairClassification": { "ap": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "OpusparcusPC (fr)": 92.48, + "PawsXPairClassification (fr)": 62.52, + "SprintDuplicateQuestions": 91.44, + "TwitterSemEval2015": 80.89, + "TwitterURLCorpus": 85.86 } ] }, "Reranking": { "map": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "AlloprofReranking": 63.3, + "AskUbuntuDupQuestions": 62.86, + "MindSmallReranking": 29.77, + "SciDocsRR": 75.16, + "StackOverflowDupQuestions": 51.05, + "SyntecReranking": 83.07 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-search-davinci-001", - "ArguAna": 43.5, - "ClimateFEVER": 22.3, - "FEVER": 77.5, - "FiQA2018": 51.2, - "HotpotQA": 68.8, - "NFCorpus": 40.7, - "QuoraRetrieval": 63.8, - "SciFact": 75.4, - "TRECCOVID": 64.9, - "Touche2020": 29.1 + "Model": "sentence-t5-xl", + "AlloprofRetrieval": 40.38, + "ArguAna": 39.4, + "BSARDRetrieval": 0.14, + "CQADupstackRetrieval": 40.78, + "ClimateFEVER": 10.61, + "DBPedia": 33.65, + "FEVER": 36.12, + "FiQA2018": 44.71, + "HotpotQA": 37.17, + "MSMARCO": 25.17, + "MintakaRetrieval (fr)": 31.54, + "NFCorpus": 33.18, + "NQ": 46.29, + "QuoraRetrieval": 85.85, + "SCIDOCS": 15.97, + "SciFact": 50.91, + "SyntecRetrieval": 74.24, + "TRECCOVID": 54.77, + "Touche2020": 22.51, + "XPQARetrieval (fr)": 52.14 } ] }, "STS": { "spearman": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "BIOSSES": 73.12, + "SICK-R": 79.98, + "SICKFr": 75.08, + "STS12": 79.02, + "STS13": 88.8, + "STS14": 84.33, + "STS15": 88.89, + "STS16": 85.31, + "STS17 (ar-ar)": 11.13, + "STS17 (en-ar)": -3.93, + "STS17 (en-de)": 79.04, + "STS17 (en-en)": 88.91, + "STS17 (en-tr)": 13.61, + "STS17 (es-en)": 71.72, + "STS17 (es-es)": 83.42, + "STS17 (fr-en)": 71.38, + "STS17 (it-en)": 69.5, + "STS17 (ko-ko)": 9.61, + "STS17 (nl-en)": 66.12, + "STS22 (ar)": 29.6, + "STS22 (de)": 47.72, + "STS22 (de-en)": 49.64, + "STS22 (de-fr)": 62.21, + "STS22 (de-pl)": 34.34, + "STS22 (en)": 64.32, + "STS22 (es)": 58.16, + "STS22 (es-en)": 69.15, + "STS22 (es-it)": 65.26, + "STS22 (fr)": 77.49, + "STS22 (fr-pl)": 50.71, + "STS22 (it)": 66.91, + "STS22 (pl)": 27.04, + "STS22 (pl-en)": 58.85, + "STS22 (ru)": 26.63, + "STS22 (tr)": 43.36, + "STS22 (zh)": 33.55, + "STS22 (zh-en)": 29.0, + "STSBenchmark": 83.93, + "STSBenchmarkMultilingualSTS (fr)": 79.42 } ] }, "Summarization": { "spearman": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl", + "SummEval": 29.91, + "SummEvalFr": 31.59 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text-search-davinci-001" + "Model": "sentence-t5-xl" } ] } }, - "m3e-base": { + "sentence-camembert-base": { "BitextMining": { "f1": [ { - "Model": "m3e-base" + "Model": "sentence-camembert-base" } ] }, "Classification": { "accuracy": [ { - "Model": "m3e-base", - "AmazonReviewsClassification (zh)": 43.02, - "IFlyTek": 44.42, - "JDReview": 85.33, - "MassiveIntentClassification (zh-CN)": 68.4, - "MassiveScenarioClassification (zh-CN)": 74.6, - "MultilingualSentiment": 71.9, - "OnlineShopping": 87.77, - "TNews": 48.28, - "Waimai": 83.99 + "Model": "sentence-camembert-base", + "AmazonReviewsClassification (fr)": 36.03, + "MTOPDomainClassification (fr)": 77.1, + "MTOPIntentClassification (fr)": 43.44, + "MasakhaNEWSClassification (fra)": 70.36, + "MassiveIntentClassification (fr)": 51.59, + "MassiveScenarioClassification (fr)": 61.28 } ] }, "Clustering": { "v_measure": [ { - "Model": "m3e-base", - "CLSClusteringP2P": 39.81, - "CLSClusteringS2S": 37.34, - "ThuNewsClusteringP2P": 59.77, - "ThuNewsClusteringS2S": 53.78 + "Model": "sentence-camembert-base", + "AlloProfClusteringP2P": 59.09, + "AlloProfClusteringS2S": 38.92, + "HALClusteringS2S": 20.22, + "MLSUMClusteringP2P": 35.98, + "MLSUMClusteringS2S": 27.05, + "MasakhaNEWSClusteringP2P (fra)": 36.03, + "MasakhaNEWSClusteringS2S (fra)": 30.77 } ] }, "PairClassification": { "ap": [ { - "Model": "m3e-base", - "Cmnli": 69.98, - "Ocnli": 58.0 + "Model": "sentence-camembert-base", + "OpusparcusPC (fr)": 92.05, + "PawsXPairClassification (fr)": 57.44 } ] }, "Reranking": { "map": [ { - "Model": "m3e-base", - "CMedQAv1": 77.05, - "CMedQAv2": 76.76, - "MMarcoReranking": 17.51, - "T2Reranking": 66.03 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "m3e-base", - "CmedqaRetrieval": 30.33, - "CovidRetrieval": 66.42, - "DuRetrieval": 75.76, - "EcomRetrieval": 50.27, - "MMarcoRetrieval": 65.46, - "MedicalRetrieval": 42.79, - "T2Retrieval": 73.14, - "VideoRetrieval": 51.11 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "m3e-base", - "AFQMC": 35.87, - "ATEC": 41.27, - "BQ": 63.81, - "LCQMC": 74.88, - "PAWSX": 12.19, - "QBQTC": 32.07, - "STS22 (zh)": 66.73, - "STSB": 76.97 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "m3e-base" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "m3e-base" - } - ] - } - }, - "multilingual-e5-large": { - "BitextMining": { - "f1": [ - { - "Model": "multilingual-e5-large", - "BornholmBitextMining (dan-Latn)": 29.61, - "BornholmBitextMining": 44.16, - "Tatoeba (tgl-Latn_eng-Latn)": 92.0, - "Tatoeba (gsw-Latn_eng-Latn)": 51.65, - "Tatoeba (tzl-Latn_eng-Latn)": 53.16, - "Tatoeba (slv-Latn_eng-Latn)": 89.57, - "Tatoeba (jav-Latn_eng-Latn)": 75.46, - "Tatoeba (uig-Arab_eng-Latn)": 72.17, - "Tatoeba (ind-Latn_eng-Latn)": 92.9, - "Tatoeba (rus-Cyrl_eng-Latn)": 92.32, - "Tatoeba (war-Latn_eng-Latn)": 62.02, - "Tatoeba (mar-Deva_eng-Latn)": 88.58, - "Tatoeba (mkd-Cyrl_eng-Latn)": 85.63, - "Tatoeba (jpn-Jpan_eng-Latn)": 95.28, - "Tatoeba (hun-Latn_eng-Latn)": 94.01, - "Tatoeba (slk-Latn_eng-Latn)": 93.13, - "Tatoeba (tha-Thai_eng-Latn)": 95.38, - "Tatoeba (fra-Latn_eng-Latn)": 93.42, - "Tatoeba (ukr-Cyrl_eng-Latn)": 93.32, - "Tatoeba (kat-Geor_eng-Latn)": 84.09, - "Tatoeba (nov-Latn_eng-Latn)": 71.62, - "Tatoeba (kor-Hang_eng-Latn)": 90.65, - "Tatoeba (ben-Beng_eng-Latn)": 83.02, - "Tatoeba (cor-Latn_eng-Latn)": 6.28, - "Tatoeba (lfn-Latn_eng-Latn)": 62.91, - "Tatoeba (swh-Latn_eng-Latn)": 71.61, - "Tatoeba (tur-Latn_eng-Latn)": 96.27, - "Tatoeba (cbk-Latn_eng-Latn)": 69.26, - "Tatoeba (kur-Latn_eng-Latn)": 66.83, - "Tatoeba (arq-Arab_eng-Latn)": 41.56, - "Tatoeba (ceb-Latn_eng-Latn)": 55.31, - "Tatoeba (max-Deva_eng-Latn)": 63.41, - "Tatoeba (ang-Latn_eng-Latn)": 40.18, - "Tatoeba (nds-Latn_eng-Latn)": 69.28, - "Tatoeba (epo-Latn_eng-Latn)": 96.01, - "Tatoeba (heb-Hebr_eng-Latn)": 86.61, - "Tatoeba (yue-Hant_eng-Latn)": 88.71, - "Tatoeba (dan-Latn_eng-Latn)": 95.08, - "Tatoeba (swe-Latn_eng-Latn)": 95.3, - "Tatoeba (lvs-Latn_eng-Latn)": 90.06, - "Tatoeba (ast-Latn_eng-Latn)": 81.76, - "Tatoeba (dsb-Latn_eng-Latn)": 48.44, - "Tatoeba (pes-Arab_eng-Latn)": 92.14, - "Tatoeba (dtp-Latn_eng-Latn)": 7.03, - "Tatoeba (tuk-Latn_eng-Latn)": 33.15, - "Tatoeba (isl-Latn_eng-Latn)": 92.09, - "Tatoeba (khm-Khmr_eng-Latn)": 59.96, - "Tatoeba (pam-Latn_eng-Latn)": 9.32, - "Tatoeba (tat-Cyrl_eng-Latn)": 73.51, - "Tatoeba (bos-Latn_eng-Latn)": 92.86, - "Tatoeba (spa-Latn_eng-Latn)": 97.1, - "Tatoeba (kaz-Cyrl_eng-Latn)": 79.67, - "Tatoeba (bel-Cyrl_eng-Latn)": 91.08, - "Tatoeba (zsm-Latn_eng-Latn)": 94.53, - "Tatoeba (cat-Latn_eng-Latn)": 91.03, - "Tatoeba (urd-Arab_eng-Latn)": 89.21, - "Tatoeba (mon-Cyrl_eng-Latn)": 87.53, - "Tatoeba (tam-Taml_eng-Latn)": 88.23, - "Tatoeba (fry-Latn_eng-Latn)": 63.43, - "Tatoeba (nob-Latn_eng-Latn)": 97.2, - "Tatoeba (tel-Telu_eng-Latn)": 91.34, - "Tatoeba (hye-Armn_eng-Latn)": 90.92, - "Tatoeba (awa-Deva_eng-Latn)": 72.27, - "Tatoeba (hrv-Latn_eng-Latn)": 96.15, - "Tatoeba (ile-Latn_eng-Latn)": 79.16, - "Tatoeba (amh-Ethi_eng-Latn)": 80.69, - "Tatoeba (orv-Cyrl_eng-Latn)": 39.87, - "Tatoeba (ara-Arab_eng-Latn)": 85.48, - "Tatoeba (ido-Latn_eng-Latn)": 83.52, - "Tatoeba (hin-Deva_eng-Latn)": 94.48, - "Tatoeba (por-Latn_eng-Latn)": 93.63, - "Tatoeba (ron-Latn_eng-Latn)": 94.87, - "Tatoeba (swg-Latn_eng-Latn)": 55.64, - "Tatoeba (cmn-Hans_eng-Latn)": 95.28, - "Tatoeba (pol-Latn_eng-Latn)": 96.6, - "Tatoeba (bul-Cyrl_eng-Latn)": 92.93, - "Tatoeba (ina-Latn_eng-Latn)": 93.47, - "Tatoeba (bre-Latn_eng-Latn)": 11.1, - "Tatoeba (wuu-Hans_eng-Latn)": 86.37, - "Tatoeba (lit-Latn_eng-Latn)": 88.48, - "Tatoeba (csb-Latn_eng-Latn)": 36.98, - "Tatoeba (lat-Latn_eng-Latn)": 53.37, - "Tatoeba (gle-Latn_eng-Latn)": 71.48, - "Tatoeba (ita-Latn_eng-Latn)": 93.29, - "Tatoeba (srp-Cyrl_eng-Latn)": 93.1, - "Tatoeba (arz-Arab_eng-Latn)": 74.73, - "Tatoeba (cym-Latn_eng-Latn)": 76.21, - "Tatoeba (ber-Tfng_eng-Latn)": 38.9, - "Tatoeba (xho-Latn_eng-Latn)": 80.87, - "Tatoeba (uzb-Latn_eng-Latn)": 72.35, - "Tatoeba (pms-Latn_eng-Latn)": 59.85, - "Tatoeba (est-Latn_eng-Latn)": 85.03, - "Tatoeba (deu-Latn_eng-Latn)": 99.07, - "Tatoeba (yid-Hebr_eng-Latn)": 76.33, - "Tatoeba (ell-Grek_eng-Latn)": 93.88, - "Tatoeba (afr-Latn_eng-Latn)": 90.22, - "Tatoeba (fao-Latn_eng-Latn)": 72.62, - "Tatoeba (nld-Latn_eng-Latn)": 96.63, - "Tatoeba (hsb-Latn_eng-Latn)": 58.9, - "Tatoeba (aze-Latn_eng-Latn)": 87.61, - "Tatoeba (kzj-Latn_eng-Latn)": 7.91, - "Tatoeba (kab-Latn_eng-Latn)": 36.54, - "Tatoeba (mal-Mlym_eng-Latn)": 97.7, - "Tatoeba (mhr-Cyrl_eng-Latn)": 6.79, - "Tatoeba (ces-Latn_eng-Latn)": 94.89, - "Tatoeba (gla-Latn_eng-Latn)": 59.0, - "Tatoeba (cha-Latn_eng-Latn)": 27.16, - "Tatoeba (glg-Latn_eng-Latn)": 93.34, - "Tatoeba (vie-Latn_eng-Latn)": 97.0, - "Tatoeba (oci-Latn_eng-Latn)": 54.91, - "Tatoeba (nno-Latn_eng-Latn)": 91.4, - "Tatoeba (fin-Latn_eng-Latn)": 95.44, - "Tatoeba (eus-Latn_eng-Latn)": 77.82, - "Tatoeba (sqi-Latn_eng-Latn)": 94.7 + "Model": "sentence-camembert-base", + "AlloprofReranking": 48.68, + "SyntecReranking": 79.75 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "sentence-camembert-base", + "AlloprofRetrieval": 21.94, + "BSARDRetrieval": 0.0, + "MintakaRetrieval (fr)": 13.36, + "SyntecRetrieval": 68.62, + "XPQARetrieval (fr)": 57.92 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "sentence-camembert-base", + "SICKFr": 74.18, + "STS22 (fr)": 77.54, + "STSBenchmarkMultilingualSTS (fr)": 81.64 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "sentence-camembert-base", + "SummEvalFr": 28.77 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "sentence-camembert-base" + } + ] + } + }, + "Cohere-embed-multilingual-v3.0": { + "BitextMining": { + "f1": [ + { + "Model": "Cohere-embed-multilingual-v3.0" } ] }, "Classification": { "accuracy": [ { - "Model": "multilingual-e5-large", - "AllegroReviews (pol-Latn)": 41.04, - "AllegroReviews": 41.14, - "AmazonCounterfactualClassification (en-ext)": 78.73, - "AmazonCounterfactualClassification (en)": 78.67, - "AmazonCounterfactualClassification (deu-Latn)": 68.66, - "AmazonCounterfactualClassification (jpn-Jpan)": 78.8, - "AmazonPolarityClassification": 93.26, - "AmazonReviewsClassification (en)": 49.2, - "AmazonReviewsClassification (deu-Latn)": 46.5, - "AmazonReviewsClassification (spa-Latn)": 44.35, - "AmazonReviewsClassification (fra-Latn)": 42.55, - "AmazonReviewsClassification (jpn-Jpan)": 41.71, - "AmazonReviewsClassification (cmn-Hans)": 38.87, - "AmazonReviewsClassification (fr)": 41.91, - "AngryTweetsClassification (dan-Latn)": 57.69, - "AngryTweetsClassification": 54.95, - "Banking77Classification": 75.88, - "CBD (pol-Latn)": 69.84, - "CBD": 69.9, - "DKHateClassification": 66.02, - "DanishPoliticalCommentsClassification (dan-Latn)": 39.43, - "DanishPoliticalCommentsClassification": 38.27, - "EmotionClassification": 47.58, - "GeoreviewClassification (rus-Cyrl)": 49.69, - "HeadlineClassification (rus-Cyrl)": 77.19, - "IFlyTek (cmn-Hans)": 41.86, - "IFlyTek": 45.47, - "ImdbClassification": 90.23, - "InappropriatenessClassification (rus-Cyrl)": 61.6, - "JDReview (cmn-Hans)": 80.54, - "JDReview": 80.99, - "KinopoiskClassification (rus-Cyrl)": 56.59, - "LccSentimentClassification (dan-Latn)": 61.53, - "LccSentimentClassification": 59.6, - "MTOPDomainClassification (en)": 91.81, - "MTOPDomainClassification (deu-Latn)": 90.44, - "MTOPDomainClassification (spa-Latn)": 88.34, - "MTOPDomainClassification (fra-Latn)": 86.23, - "MTOPDomainClassification (hin-Deva)": 86.84, - "MTOPDomainClassification (tha-Thai)": 86.88, - "MTOPDomainClassification (fr)": 86.41, - "MTOPIntentClassification (en)": 64.29, - "MTOPIntentClassification (deu-Latn)": 65.97, - "MTOPIntentClassification (spa-Latn)": 61.9, - "MTOPIntentClassification (fra-Latn)": 56.25, - "MTOPIntentClassification (hin-Deva)": 59.17, - "MTOPIntentClassification (tha-Thai)": 62.59, - "MTOPIntentClassification (fr)": 59.43, - "MasakhaNEWSClassification (amh-Ethi)": 83.7, - "MasakhaNEWSClassification (eng)": 78.26, - "MasakhaNEWSClassification (fra-Latn)": 76.11, - "MasakhaNEWSClassification (hau-Latn)": 76.17, - "MasakhaNEWSClassification (ibo-Latn)": 70.05, - "MasakhaNEWSClassification (lin-Latn)": 75.89, - "MasakhaNEWSClassification (lug-Latn)": 73.63, - "MasakhaNEWSClassification (orm-Ethi)": 80.31, - "MasakhaNEWSClassification (pcm-Latn)": 89.15, - "MasakhaNEWSClassification (run-Latn)": 76.55, - "MasakhaNEWSClassification (sna-Latn)": 86.99, - "MasakhaNEWSClassification (som-Latn)": 64.63, - "MasakhaNEWSClassification (swa-Latn)": 73.42, - "MasakhaNEWSClassification (tir-Ethi)": 72.06, - "MasakhaNEWSClassification (xho-Latn)": 82.56, - "MasakhaNEWSClassification (yor-Latn)": 81.09, - "MasakhaNEWSClassification (fra)": 79.38, - "MassiveIntentClassification (kor-Kore)": 63.92, - "MassiveIntentClassification (lav-Latn)": 58.31, - "MassiveIntentClassification (isl-Latn)": 53.3, - "MassiveIntentClassification (tel-Telu)": 53.96, - "MassiveIntentClassification (mya-Mymr)": 49.73, - "MassiveIntentClassification (nob-Latn)": 64.54, - "MassiveIntentClassification (en)": 68.51, - "MassiveIntentClassification (spa-Latn)": 64.01, - "MassiveIntentClassification (swe-Latn)": 66.52, - "MassiveIntentClassification (cmo-Hant)": 58.78, - "MassiveIntentClassification (pol-Latn)": 65.09, - "MassiveIntentClassification (rus-Cyrl)": 65.76, - "MassiveIntentClassification (aze-Latn)": 54.68, - "MassiveIntentClassification (fin-Latn)": 64.28, - "MassiveIntentClassification (cmo-Hans)": 66.23, - "MassiveIntentClassification (urd-Arab)": 54.6, - "MassiveIntentClassification (tam-Taml)": 53.41, - "MassiveIntentClassification (hin-Deva)": 60.93, - "MassiveIntentClassification (deu-Latn)": 63.82, - "MassiveIntentClassification (ell-Grek)": 64.34, - "MassiveIntentClassification (hye-Armn)": 50.89, - "MassiveIntentClassification (por-Latn)": 65.6, - "MassiveIntentClassification (nld-Latn)": 65.0, - "MassiveIntentClassification (fas-Arab)": 63.74, - "MassiveIntentClassification (ron-Latn)": 59.76, - "MassiveIntentClassification (slv-Latn)": 59.38, - "MassiveIntentClassification (heb-Hebr)": 62.44, - "MassiveIntentClassification (vie-Latn)": 63.39, - "MassiveIntentClassification (sqi-Latn)": 57.3, - "MassiveIntentClassification (khm-Khmr)": 34.88, - "MassiveIntentClassification (ben-Beng)": 55.6, - "MassiveIntentClassification (tgl-Latn)": 54.77, - "MassiveIntentClassification (jpn-Jpan)": 67.11, - "MassiveIntentClassification (kat-Geor)": 41.45, - "MassiveIntentClassification (afr-Latn)": 53.69, - "MassiveIntentClassification (cym-Latn)": 44.22, - "MassiveIntentClassification (amh-Ethi)": 45.48, - "MassiveIntentClassification (ita-Latn)": 63.89, - "MassiveIntentClassification (mal-Mlym)": 57.58, - "MassiveIntentClassification (tha-Thai)": 62.75, - "MassiveIntentClassification (ind-Latn)": 63.51, - "MassiveIntentClassification (jav-Latn)": 48.96, - "MassiveIntentClassification (dan-Latn)": 63.7, - "MassiveIntentClassification (ara-Arab)": 54.1, - "MassiveIntentClassification (kan-Knda)": 53.45, - "MassiveIntentClassification (hun-Latn)": 64.0, - "MassiveIntentClassification (tur-Latn)": 64.61, - "MassiveIntentClassification (msa-Latn)": 58.49, - "MassiveIntentClassification (mon-Cyrl)": 49.6, - "MassiveIntentClassification (swa-Latn)": 47.69, - "MassiveIntentClassification (fra-Latn)": 63.37, - "MassiveIntentClassification (da)": 60.16, - "MassiveIntentClassification (nb)": 59.83, - "MassiveIntentClassification (sv)": 61.78, - "MassiveIntentClassification (pl)": 65.07, - "MassiveScenarioClassification (heb-Hebr)": 67.72, - "MassiveScenarioClassification (vie-Latn)": 68.91, - "MassiveScenarioClassification (cmo-Hant)": 64.35, - "MassiveScenarioClassification (urd-Arab)": 60.89, - "MassiveScenarioClassification (isl-Latn)": 60.74, - "MassiveScenarioClassification (ell-Grek)": 69.74, - "MassiveScenarioClassification (mon-Cyrl)": 55.37, - "MassiveScenarioClassification (swa-Latn)": 56.27, - "MassiveScenarioClassification (tam-Taml)": 58.76, - "MassiveScenarioClassification (hye-Armn)": 55.76, - "MassiveScenarioClassification (amh-Ethi)": 52.69, - "MassiveScenarioClassification (ben-Beng)": 61.85, - "MassiveScenarioClassification (tel-Telu)": 59.49, - "MassiveScenarioClassification (dan-Latn)": 71.18, - "MassiveScenarioClassification (slv-Latn)": 65.33, - "MassiveScenarioClassification (en)": 73.04, - "MassiveScenarioClassification (rus-Cyrl)": 70.85, - "MassiveScenarioClassification (mal-Mlym)": 63.17, - "MassiveScenarioClassification (sqi-Latn)": 63.79, - "MassiveScenarioClassification (ita-Latn)": 69.45, - "MassiveScenarioClassification (kor-Kore)": 70.54, - "MassiveScenarioClassification (cmo-Hans)": 72.25, - "MassiveScenarioClassification (cym-Latn)": 51.25, - "MassiveScenarioClassification (pol-Latn)": 69.83, - "MassiveScenarioClassification (ind-Latn)": 69.43, - "MassiveScenarioClassification (tur-Latn)": 68.12, - "MassiveScenarioClassification (tgl-Latn)": 60.71, - "MassiveScenarioClassification (hin-Deva)": 66.85, - "MassiveScenarioClassification (spa-Latn)": 69.07, - "MassiveScenarioClassification (lav-Latn)": 64.28, - "MassiveScenarioClassification (mya-Mymr)": 54.03, - "MassiveScenarioClassification (ara-Arab)": 61.0, - "MassiveScenarioClassification (kan-Knda)": 59.36, - "MassiveScenarioClassification (jav-Latn)": 56.24, - "MassiveScenarioClassification (por-Latn)": 68.33, - "MassiveScenarioClassification (tha-Thai)": 69.06, - "MassiveScenarioClassification (aze-Latn)": 58.49, - "MassiveScenarioClassification (fra-Latn)": 68.74, - "MassiveScenarioClassification (ron-Latn)": 66.06, - "MassiveScenarioClassification (nld-Latn)": 71.11, - "MassiveScenarioClassification (fas-Arab)": 67.55, - "MassiveScenarioClassification (deu-Latn)": 71.25, - "MassiveScenarioClassification (nob-Latn)": 70.44, - "MassiveScenarioClassification (msa-Latn)": 63.55, - "MassiveScenarioClassification (afr-Latn)": 62.35, - "MassiveScenarioClassification (hun-Latn)": 70.53, - "MassiveScenarioClassification (swe-Latn)": 72.77, - "MassiveScenarioClassification (kat-Geor)": 47.82, - "MassiveScenarioClassification (jpn-Jpan)": 73.16, - "MassiveScenarioClassification (khm-Khmr)": 41.14, - "MassiveScenarioClassification (fin-Latn)": 68.62, - "MassiveScenarioClassification (da)": 67.46, - "MassiveScenarioClassification (nb)": 66.18, - "MassiveScenarioClassification (sv)": 69.15, - "MassiveScenarioClassification (pl)": 69.82, - "MultilingualSentiment (cmn-Hans)": 70.81, - "MultilingualSentiment": 68.58, - "NoRecClassification (nob-Latn)": 58.43, - "NoRecClassification": 62.76, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 80.15, - "NordicLangClassification": 82.29, - "NorwegianParliament": 60.36, - "OnlineShopping (cmn-Hans)": 90.45, - "OnlineShopping": 90.81, - "PAC (pol-Latn)": 70.33, - "PAC": 70.37, - "PolEmo2.0-IN (pol-Latn)": 77.06, - "PolEmo2.0-IN": 77.06, - "PolEmo2.0-OUT (pol-Latn)": 53.48, - "PolEmo2.0-OUT": 53.38, - "RuReviewsClassification (rus-Cyrl)": 65.28, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 58.2, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.91, - "ScalaDaClassification": 50.77, - "ScalaNbClassification": 50.44, - "TNews (cmn-Hans)": 48.8, - "TNews": 48.38, - "ToxicConversationsClassification": 66.01, - "TweetSentimentExtractionClassification": 62.8, - "Waimai (cmn-Hans)": 86.3, - "Waimai": 85.02 + "Model": "Cohere-embed-multilingual-v3.0", + "AmazonReviewsClassification (fr)": 41.89, + "MTOPDomainClassification (fr)": 86.23, + "MTOPIntentClassification (fr)": 61.07, + "MasakhaNEWSClassification (fra)": 83.06, + "MassiveIntentClassification (fr)": 62.94, + "MassiveScenarioClassification (fr)": 67.29 } ] }, "Clustering": { "v_measure": [ { - "Model": "multilingual-e5-large", - "8TagsClustering": 33.88, - "AlloProfClusteringP2P": 62.99, - "AlloProfClusteringS2S": 32.26, - "BiorxivClusteringP2P": 35.5, - "BiorxivClusteringS2S": 33.3, - "CLSClusteringP2P": 40.68, - "CLSClusteringS2S": 38.59, - "GeoreviewClusteringP2P (rus-Cyrl)": 60.51, - "HALClusteringS2S": 22.44, - "MLSUMClusteringP2P (rus-Cyrl)": 42.79, - "MLSUMClusteringP2P": 44.04, - "MLSUMClusteringS2S (rus-Cyrl)": 44.32, - "MLSUMClusteringS2S": 37.65, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 67.16, - "MasakhaNEWSClusteringP2P (eng)": 61.1, - "MasakhaNEWSClusteringP2P (fra-Latn)": 41.66, - "MasakhaNEWSClusteringP2P (hau-Latn)": 60.7, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 48.41, - "MasakhaNEWSClusteringP2P (lin-Latn)": 57.69, - "MasakhaNEWSClusteringP2P (lug-Latn)": 71.95, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 60.14, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 80.84, - "MasakhaNEWSClusteringP2P (run-Latn)": 59.91, - "MasakhaNEWSClusteringP2P (sna-Latn)": 53.3, - "MasakhaNEWSClusteringP2P (som-Latn)": 34.38, - "MasakhaNEWSClusteringP2P (swa-Latn)": 33.25, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 54.21, - "MasakhaNEWSClusteringP2P (xho-Latn)": 41.12, - "MasakhaNEWSClusteringP2P (yor-Latn)": 36.22, - "MasakhaNEWSClusteringP2P (fra)": 40.94, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 47.24, - "MasakhaNEWSClusteringS2S (eng)": 53.93, - "MasakhaNEWSClusteringS2S (fra-Latn)": 39.84, - "MasakhaNEWSClusteringS2S (hau-Latn)": 19.24, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 28.88, - "MasakhaNEWSClusteringS2S (lin-Latn)": 42.22, - "MasakhaNEWSClusteringS2S (lug-Latn)": 43.63, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 26.29, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 59.77, - "MasakhaNEWSClusteringS2S (run-Latn)": 51.46, - "MasakhaNEWSClusteringS2S (sna-Latn)": 48.14, - "MasakhaNEWSClusteringS2S (som-Latn)": 25.14, - "MasakhaNEWSClusteringS2S (swa-Latn)": 7.28, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 50.51, - "MasakhaNEWSClusteringS2S (xho-Latn)": 30.98, - "MasakhaNEWSClusteringS2S (yor-Latn)": 34.09, - "MasakhaNEWSClusteringS2S (fra)": 30.56, - "MedrxivClusteringP2P": 31.7, - "MedrxivClusteringS2S": 29.76, - "RedditClustering": 46.91, - "RedditClusteringP2P": 63.0, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 52.03, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 45.11, - "StackExchangeClustering": 58.37, - "StackExchangeClusteringP2P": 32.9, - "ThuNewsClusteringP2P": 58.05, - "ThuNewsClusteringS2S": 55.59, - "TwentyNewsgroupsClustering": 39.4 + "Model": "Cohere-embed-multilingual-v3.0", + "AlloProfClusteringP2P": 63.53, + "AlloProfClusteringS2S": 36.18, + "HALClusteringS2S": 19.9, + "MLSUMClusteringP2P": 45.08, + "MLSUMClusteringS2S": 34.75, + "MasakhaNEWSClusteringP2P (fra)": 53.18, + "MasakhaNEWSClusteringS2S (fra)": 32.31 } ] }, "PairClassification": { "ap": [ { - "Model": "multilingual-e5-large", - "CDSC-E (pol-Latn)": 74.47, - "CDSC-E": 74.47, - "Cmnli": 78.18, - "Ocnli": 61.6, - "OpusparcusPC (deu-Latn)": 97.27, - "OpusparcusPC (en)": 98.74, - "OpusparcusPC (fin-Latn)": 94.26, - "OpusparcusPC (fra-Latn)": 93.68, - "OpusparcusPC (rus-Cyrl)": 89.64, - "OpusparcusPC (swe-Latn)": 94.98, - "OpusparcusPC (fr)": 93.89, - "PPC": 92.18, - "PSC (pol-Latn)": 99.4, - "PSC": 99.39, - "PawsXPairClassification (deu-Latn)": 56.81, - "PawsXPairClassification (en)": 62.97, - "PawsXPairClassification (spa-Latn)": 56.85, - "PawsXPairClassification (fra-Latn)": 58.68, - "PawsXPairClassification (jpn-Hira)": 50.7, - "PawsXPairClassification (kor-Hang)": 52.08, - "PawsXPairClassification (cmn-Hans)": 56.82, - "PawsXPairClassification (fr)": 58.5, - "SICK-E-PL (pol-Latn)": 75.95, - "SICK-E-PL": 75.96, - "SprintDuplicateQuestions": 93.14, - "TERRa (rus-Cyrl)": 58.4, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 85.83 + "Model": "Cohere-embed-multilingual-v3.0", + "OpusparcusPC (fr)": 94.08, + "PawsXPairClassification (fr)": 61.26 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "AlloprofReranking": 51.01, + "SyntecReranking": 85.72 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "AlloprofRetrieval": 38.36, + "BSARDRetrieval": 0.14, + "MintakaRetrieval (fr)": 25.44, + "SyntecRetrieval": 79.27, + "XPQARetrieval (fr)": 58.87 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "SICKFr": 79.23, + "STS22 (fr)": 82.76, + "STSBenchmarkMultilingualSTS (fr)": 81.84 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "SummEvalFr": 31.26 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "Cohere-embed-multilingual-v3.0" + } + ] + } + }, + "text2vec-large-chinese": { + "BitextMining": { + "f1": [ + { + "Model": "text2vec-large-chinese" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text2vec-large-chinese", + "AmazonReviewsClassification (zh)": 33.77, + "IFlyTek": 41.54, + "JDReview": 81.56, + "MassiveIntentClassification (zh-CN)": 63.23, + "MassiveScenarioClassification (zh-CN)": 68.45, + "MultilingualSentiment": 58.97, + "OnlineShopping": 83.51, + "TNews": 38.92, + "Waimai": 76.01 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text2vec-large-chinese", + "CLSClusteringP2P": 30.13, + "CLSClusteringS2S": 28.77, + "ThuNewsClusteringP2P": 35.05, + "ThuNewsClusteringS2S": 26.14 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text2vec-large-chinese", + "Cmnli": 77.67, + "Ocnli": 64.04 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text2vec-large-chinese", + "CMedQAv1": 58.92, + "CMedQAv2": 60.41, + "MMarcoReranking": 12.48, + "T2Reranking": 64.82 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text2vec-large-chinese", + "CmedqaRetrieval": 15.53, + "CovidRetrieval": 60.48, + "DuRetrieval": 51.87, + "EcomRetrieval": 37.58, + "MMarcoRetrieval": 45.96, + "MedicalRetrieval": 30.93, + "T2Retrieval": 50.52, + "VideoRetrieval": 42.65 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text2vec-large-chinese", + "AFQMC": 24.51, + "ATEC": 32.45, + "BQ": 44.22, + "LCQMC": 69.16, + "PAWSX": 14.55, + "QBQTC": 29.51, + "STS22 (zh)": 65.94, + "STSB": 79.45 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text2vec-large-chinese" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text2vec-large-chinese" + } + ] + } + }, + "dragon-plus": { + "BitextMining": { + "f1": [ + { + "Model": "dragon-plus" } ] }, - "Reranking": { - "map": [ + "Classification": { + "accuracy": [ { - "Model": "multilingual-e5-large", - "AlloprofReranking (fra-Latn)": 69.44, - "AlloprofReranking": 57.37, - "AskUbuntuDupQuestions": 59.24, - "CMedQAv1": 68.25, - "CMedQAv2": 68.56, - "MMarcoReranking (cmn-Hans)": 29.12, - "MMarcoReranking": 21.34, - "MindSmallReranking": 30.24, - "RuBQReranking (rus-Cyrl)": 75.58, - "SciDocsRR": 84.22, - "StackOverflowDupQuestions": 50.14, - "SyntecReranking (fra-Latn)": 85.45, - "SyntecReranking": 86.9, - "T2Reranking (cmn-Hans)": 66.32, - "T2Reranking": 65.83 + "Model": "dragon-plus" } ] }, - "Retrieval": { - "ndcg_at_10": [ + "Clustering": { + "v_measure": [ { - "Model": "multilingual-e5-large", - "AILACasedocs": 26.43, - "AILAStatutes": 20.84, - "ARCChallenge": 10.83, - "AlloprofRetrieval (fra-Latn)": 39.34, - "AlloprofRetrieval": 38.15, - "AlphaNLI": 13.59, - "ArguAna": 54.36, - "ArguAna-PL (pol-Latn)": 52.99, - "ArguAna-PL": 53.02, - "BSARDRetrieval (fra-Latn)": 21.28, - "BSARDRetrieval": 0.27, - "CmedqaRetrieval (cmn-Hans)": 28.66, - "CmedqaRetrieval": 28.67, - "CovidRetrieval (cmn-Hans)": 75.61, - "CovidRetrieval": 75.51, - "DBPedia-PL": 35.82, - "DuRetrieval (cmn-Hans)": 85.3, - "DuRetrieval": 85.32, - "EcomRetrieval (cmn-Hans)": 54.67, - "EcomRetrieval": 54.75, - "FiQA-PL (pol-Latn)": 32.97, - "FiQA-PL": 33.0, - "FiQA2018": 43.81, - "GerDaLIRSmall (deu-Latn)": 15.72, - "HellaSwag": 27.35, - "HotpotQA-PL": 67.41, - "LEMBNarrativeQARetrieval": 24.22, - "LEMBNeedleRetrieval": 28.0, - "LEMBPasskeyRetrieval": 38.25, - "LEMBQMSumRetrieval": 24.26, - "LEMBSummScreenFDRetrieval": 71.12, - "LEMBWikimQARetrieval": 56.8, - "LeCaRDv2 (zho-Hans)": 55.83, - "LegalBenchConsumerContractsQA": 73.3, - "LegalBenchCorporateLobbying": 89.72, - "LegalQuAD (deu-Latn)": 43.17, - "LegalSummarization": 62.1, - "MMarcoRetrieval (cmn-Hans)": 79.2, - "MMarcoRetrieval": 79.2, - "MSMARCO-PL": 33.38, - "MedicalRetrieval (cmn-Hans)": 51.44, - "MedicalRetrieval": 51.44, - "MintakaRetrieval (ara-Arab)": 26.5, - "MintakaRetrieval (deu-Latn)": 32.77, - "MintakaRetrieval (spa-Latn)": 34.23, - "MintakaRetrieval (fra-Latn)": 34.24, - "MintakaRetrieval (hin-Deva)": 27.45, - "MintakaRetrieval (ita-Latn)": 33.84, - "MintakaRetrieval (jpn-Hira)": 26.45, - "MintakaRetrieval (por-Latn)": 35.9, - "MintakaRetrieval (fr)": 25.2, - "NFCorpus": 33.95, - "NFCorpus-PL (pol-Latn)": 30.21, - "NFCorpus-PL": 30.24, - "NQ-PL": 52.79, - "PIQA": 28.82, - "Quail": 4.85, - "Quora-PL": 83.65, - "RARbCode": 58.92, - "RARbMath": 67.32, - "RiaNewsRetrieval (rus-Cyrl)": 80.67, - "RuBQRetrieval (rus-Cyrl)": 74.11, - "SCIDOCS": 17.45, - "SCIDOCS-PL (pol-Latn)": 13.82, - "SCIDOCS-PL": 13.81, - "SIQA": 5.36, - "SciFact": 70.42, - "SciFact-PL (pol-Latn)": 65.66, - "SciFact-PL": 65.66, - "SpartQA": 5.64, - "SyntecRetrieval (fra-Latn)": 82.39, - "SyntecRetrieval": 81.07, - "T2Retrieval (cmn-Hans)": 76.07, - "T2Retrieval": 76.11, - "TRECCOVID": 71.21, - "TRECCOVID-PL (pol-Latn)": 69.9, - "TRECCOVID-PL": 70.03, - "TempReasonL1": 1.14, - "TempReasonL2Fact": 42.97, - "TempReasonL2Pure": 2.05, - "TempReasonL3Fact": 38.22, - "TempReasonL3Pure": 8.31, - "Touche2020": 23.13, - "VideoRetrieval (cmn-Hans)": 58.28, - "VideoRetrieval": 58.25, - "WinoGrande": 54.99, - "XPQARetrieval (ara-Arab_ara-Arab)": 43.69, - "XPQARetrieval (eng-Latn_ara-Arab)": 30.86, - "XPQARetrieval (ara-Arab_eng-Latn)": 39.11, - "XPQARetrieval (deu-Latn_deu-Latn)": 76.83, - "XPQARetrieval (eng-Latn_deu-Latn)": 42.87, - "XPQARetrieval (deu-Latn_eng-Latn)": 68.25, - "XPQARetrieval (spa-Latn_spa-Latn)": 61.77, - "XPQARetrieval (eng-Latn_spa-Latn)": 37.55, - "XPQARetrieval (spa-Latn_eng-Latn)": 52.86, - "XPQARetrieval (fra-Latn_fra-Latn)": 61.38, - "XPQARetrieval (eng-Latn_fra-Latn)": 39.12, - "XPQARetrieval (fra-Latn_eng-Latn)": 57.93, - "XPQARetrieval (hin-Deva_hin-Deva)": 71.09, - "XPQARetrieval (eng-Latn_hin-Deva)": 32.39, - "XPQARetrieval (hin-Deva_eng-Latn)": 68.31, - "XPQARetrieval (ita-Latn_ita-Latn)": 74.32, - "XPQARetrieval (eng-Latn_ita-Latn)": 37.95, - "XPQARetrieval (ita-Latn_eng-Latn)": 64.54, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 74.11, - "XPQARetrieval (eng-Latn_jpn-Hira)": 38.31, - "XPQARetrieval (jpn-Hira_eng-Latn)": 65.42, - "XPQARetrieval (kor-Hang_kor-Hang)": 35.72, - "XPQARetrieval (eng-Latn_kor-Hang)": 31.09, - "XPQARetrieval (kor-Hang_eng-Latn)": 34.06, - "XPQARetrieval (pol-Latn_pol-Latn)": 51.01, - "XPQARetrieval (eng-Latn_pol-Latn)": 30.49, - "XPQARetrieval (pol-Latn_eng-Latn)": 44.66, - "XPQARetrieval (por-Latn_por-Latn)": 41.1, - "XPQARetrieval (eng-Latn_por-Latn)": 22.03, - "XPQARetrieval (por-Latn_eng-Latn)": 35.15, - "XPQARetrieval (tam-Taml_tam-Taml)": 39.51, - "XPQARetrieval (eng-Latn_tam-Taml)": 17.33, - "XPQARetrieval (tam-Taml_eng-Latn)": 33.67, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 66.27, - "XPQARetrieval (eng-Latn_cmn-Hans)": 26.24, - "XPQARetrieval (cmn-Hans_eng-Latn)": 55.15, - "XPQARetrieval (fr)": 66.15 + "Model": "dragon-plus" } ] }, - "STS": { - "spearman": [ + "PairClassification": { + "ap": [ { - "Model": "multilingual-e5-large", - "AFQMC (cmn-Hans)": 33.01, - "AFQMC": 33.02, - "ATEC (cmn-Hans)": 39.8, - "ATEC": 39.81, - "BIOSSES": 82.49, - "BQ (cmn-Hans)": 46.44, - "BQ": 46.44, - "CDSC-R (pol-Latn)": 91.0, - "CDSC-R": 91.0, - "LCQMC (cmn-Hans)": 75.95, - "LCQMC": 75.95, - "PAWSX (cmn-Hans)": 14.63, - "PAWSX": 14.63, - "QBQTC": 29.77, - "RUParaPhraserSTS (rus-Cyrl)": 71.82, - "RuSTSBenchmarkSTS (rus-Cyrl)": 83.15, - "SICK-R": 80.23, - "SICK-R-PL (pol-Latn)": 75.08, - "SICK-R-PL": 75.08, - "SICKFr (fra-Latn)": 78.81, - "SICKFr": 78.78, - "STS12": 80.02, - "STS13": 81.55, - "STS14": 77.72, - "STS15": 89.31, - "STS16": 85.79, - "STS17 (en-en)": 88.12, - "STS17 (spa-Latn)": 86.71, - "STS17 (spa-Latn_eng-Latn)": 80.74, - "STS17 (eng-Latn_ara-Arab)": 75.03, - "STS17 (fra-Latn_eng-Latn)": 85.62, - "STS17 (kor-Hang)": 82.27, - "STS17 (ita-Latn_eng-Latn)": 84.52, - "STS17 (ara-Arab)": 77.83, - "STS17 (eng-Latn_tur-Latn)": 71.22, - "STS17 (eng-Latn_deu-Latn)": 86.15, - "STS17 (nld-Latn_eng-Latn)": 85.29, - "STS22 (spa-Latn)": 64.6, - "STS22 (spa-Latn_eng-Latn)": 72.51, - "STS22 (deu-Latn_eng-Latn)": 56.59, - "STS22 (cmn-Hans_eng-Latn)": 65.95, - "STS22 (deu-Latn_pol-Latn)": 49.58, - "STS22 (fra-Latn_pol-Latn)": 50.71, - "STS22 (en)": 63.66, - "STS22 (ara-Arab)": 56.95, - "STS22 (spa-Latn_ita-Latn)": 68.92, - "STS22 (tur-Latn)": 63.56, - "STS22 (deu-Latn_fra-Latn)": 67.96, - "STS22 (ita-Latn)": 76.99, - "STS22 (cmn-Hans)": 66.82, - "STS22 (rus-Cyrl)": 59.89, - "STS22 (fra-Latn)": 76.77, - "STS22 (pol-Latn_eng-Latn)": 65.54, - "STS22 (deu-Latn)": 56.58, - "STS22 (pol-Latn)": 34.65, - "STS22 (zh)": 65.64, - "STS22 (pl)": 34.66, - "STSB (cmn-Hans)": 81.08, - "STSB": 81.08, - "STSBenchmark": 87.29, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 81.22, - "STSBenchmarkMultilingualSTS (en)": 87.29, - "STSBenchmarkMultilingualSTS (pol-Latn)": 81.06, - "STSBenchmarkMultilingualSTS (nld-Latn)": 81.63, - "STSBenchmarkMultilingualSTS (ita-Latn)": 81.75, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 83.05, - "STSBenchmarkMultilingualSTS (por-Latn)": 73.31, - "STSBenchmarkMultilingualSTS (spa-Latn)": 83.81, - "STSBenchmarkMultilingualSTS (fra-Latn)": 83.28, - "STSBenchmarkMultilingualSTS (deu-Latn)": 84.27, - "STSBenchmarkMultilingualSTS (fr)": 82.53 + "Model": "dragon-plus" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "dragon-plus" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "dragon-plus", + "ARCChallenge": 8.91, + "AlphaNLI": 32.1, + "HellaSwag": 27.69, + "PIQA": 28.01, + "Quail": 4.09, + "RARbCode": 17.58, + "RARbMath": 45.09, + "SIQA": 2.0, + "SpartQA": 10.34, + "TempReasonL1": 1.82, + "TempReasonL2Fact": 17.45, + "TempReasonL2Pure": 0.55, + "TempReasonL3Fact": 15.71, + "TempReasonL3Pure": 7.97, + "WinoGrande": 67.18 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "dragon-plus" } ] }, "Summarization": { "spearman": [ { - "Model": "multilingual-e5-large", - "SummEval": 29.65, - "SummEvalFr (fra-Latn)": 30.92, - "SummEvalFr": 30.92 + "Model": "dragon-plus" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "multilingual-e5-large" + "Model": "dragon-plus" } ] } }, - "komninos": { + "bge-small-zh-v1.5": { "BitextMining": { "f1": [ { - "Model": "komninos", - "BUCC (de-en)": 0.18, - "BUCC (fr-en)": 0.08, - "BUCC (ru-en)": 0.15, - "BUCC (zh-en)": 0.05, - "Tatoeba (afr-eng)": 4.82, - "Tatoeba (amh-eng)": 1.18, - "Tatoeba (ang-eng)": 8.54, - "Tatoeba (ara-eng)": 0.63, - "Tatoeba (arq-eng)": 0.4, - "Tatoeba (arz-eng)": 0.63, - "Tatoeba (ast-eng)": 11.69, - "Tatoeba (awa-eng)": 0.0, - "Tatoeba (aze-eng)": 3.22, - "Tatoeba (bel-eng)": 1.75, - "Tatoeba (ben-eng)": 0.2, - "Tatoeba (ber-eng)": 7.0, - "Tatoeba (bos-eng)": 9.31, - "Tatoeba (bre-eng)": 4.17, - "Tatoeba (bul-eng)": 1.29, - "Tatoeba (cat-eng)": 7.73, - "Tatoeba (cbk-eng)": 5.61, - "Tatoeba (ceb-eng)": 4.88, - "Tatoeba (ces-eng)": 3.55, - "Tatoeba (cha-eng)": 19.29, - "Tatoeba (cmn-eng)": 0.5, - "Tatoeba (cor-eng)": 4.15, - "Tatoeba (csb-eng)": 5.69, - "Tatoeba (cym-eng)": 8.4, - "Tatoeba (dan-eng)": 6.99, - "Tatoeba (deu-eng)": 3.67, - "Tatoeba (dsb-eng)": 5.33, - "Tatoeba (dtp-eng)": 4.25, - "Tatoeba (ell-eng)": 0.63, - "Tatoeba (epo-eng)": 2.45, - "Tatoeba (est-eng)": 2.69, - "Tatoeba (eus-eng)": 4.69, - "Tatoeba (fao-eng)": 7.61, - "Tatoeba (fin-eng)": 3.36, - "Tatoeba (fra-eng)": 7.0, - "Tatoeba (fry-eng)": 12.36, - "Tatoeba (gla-eng)": 3.07, - "Tatoeba (gle-eng)": 4.81, - "Tatoeba (glg-eng)": 8.12, - "Tatoeba (gsw-eng)": 18.87, - "Tatoeba (heb-eng)": 0.68, - "Tatoeba (hin-eng)": 0.1, - "Tatoeba (hrv-eng)": 5.41, - "Tatoeba (hsb-eng)": 6.32, - "Tatoeba (hun-eng)": 3.42, - "Tatoeba (hye-eng)": 0.97, - "Tatoeba (ido-eng)": 7.1, - "Tatoeba (ile-eng)": 13.61, - "Tatoeba (ina-eng)": 8.57, - "Tatoeba (ind-eng)": 7.26, - "Tatoeba (isl-eng)": 4.09, - "Tatoeba (ita-eng)": 5.54, - "Tatoeba (jav-eng)": 11.43, - "Tatoeba (jpn-eng)": 0.2, - "Tatoeba (kab-eng)": 2.71, - "Tatoeba (kat-eng)": 1.11, - "Tatoeba (kaz-eng)": 1.17, - "Tatoeba (khm-eng)": 0.55, - "Tatoeba (kor-eng)": 0.5, - "Tatoeba (kur-eng)": 8.55, - "Tatoeba (kzj-eng)": 4.61, - "Tatoeba (lat-eng)": 4.07, - "Tatoeba (lfn-eng)": 2.83, - "Tatoeba (lit-eng)": 0.95, - "Tatoeba (lvs-eng)": 3.25, - "Tatoeba (mal-eng)": 0.29, - "Tatoeba (mar-eng)": 0.2, - "Tatoeba (max-eng)": 14.53, - "Tatoeba (mhr-eng)": 0.2, - "Tatoeba (mkd-eng)": 0.2, - "Tatoeba (mon-eng)": 1.1, - "Tatoeba (nds-eng)": 10.37, - "Tatoeba (nld-eng)": 9.5, - "Tatoeba (nno-eng)": 4.49, - "Tatoeba (nob-eng)": 4.95, - "Tatoeba (nov-eng)": 14.53, - "Tatoeba (oci-eng)": 5.8, - "Tatoeba (orv-eng)": 0.24, - "Tatoeba (pam-eng)": 6.65, - "Tatoeba (pes-eng)": 0.5, - "Tatoeba (pms-eng)": 8.05, - "Tatoeba (pol-eng)": 5.13, - "Tatoeba (por-eng)": 5.87, - "Tatoeba (ron-eng)": 6.76, - "Tatoeba (rus-eng)": 0.2, - "Tatoeba (slk-eng)": 4.23, - "Tatoeba (slv-eng)": 6.05, - "Tatoeba (spa-eng)": 5.03, - "Tatoeba (sqi-eng)": 4.36, - "Tatoeba (srp-eng)": 1.77, - "Tatoeba (swe-eng)": 6.72, - "Tatoeba (swg-eng)": 8.54, - "Tatoeba (swh-eng)": 11.49, - "Tatoeba (tam-eng)": 1.3, - "Tatoeba (tat-eng)": 0.77, - "Tatoeba (tel-eng)": 0.85, - "Tatoeba (tgl-eng)": 2.61, - "Tatoeba (tha-eng)": 0.69, - "Tatoeba (tuk-eng)": 5.76, - "Tatoeba (tur-eng)": 5.24, - "Tatoeba (tzl-eng)": 15.51, - "Tatoeba (uig-eng)": 0.6, - "Tatoeba (ukr-eng)": 1.23, - "Tatoeba (urd-eng)": 0.4, - "Tatoeba (uzb-eng)": 4.73, - "Tatoeba (vie-eng)": 6.55, - "Tatoeba (war-eng)": 4.12, - "Tatoeba (wuu-eng)": 0.2, - "Tatoeba (xho-eng)": 4.33, - "Tatoeba (yid-eng)": 0.59, - "Tatoeba (yue-eng)": 0.5, - "Tatoeba (zsm-eng)": 7.27 + "Model": "bge-small-zh-v1.5" } ] }, "Classification": { "accuracy": [ { - "Model": "komninos", - "AmazonCounterfactualClassification (en)": 60.54, - "AmazonPolarityClassification": 59.59, - "AmazonReviewsClassification (en)": 31.01, - "Banking77Classification": 67.05, - "EmotionClassification": 33.18, - "ImdbClassification": 63.98, - "MTOPDomainClassification (en)": 78.57, - "MTOPIntentClassification (en)": 57.07, - "MassiveIntentClassification (en)": 57.21, - "MassiveScenarioClassification (en)": 66.11, - "ToxicConversationsClassification": 67.76, - "TweetSentimentExtractionClassification": 49.68 + "Model": "bge-small-zh-v1.5", + "AmazonReviewsClassification (zh)": 35.91, + "IFlyTek": 45.49, + "JDReview": 80.04, + "MassiveIntentClassification (zh-CN)": 63.95, + "MassiveScenarioClassification (zh-CN)": 70.8, + "MultilingualSentiment": 63.06, + "OnlineShopping": 85.05, + "TNews": 48.15, + "Waimai": 83.18 } ] }, "Clustering": { "v_measure": [ { - "Model": "komninos", - "ArxivClusteringP2P": 34.73, - "ArxivClusteringS2S": 26.01, - "BiorxivClusteringP2P": 29.76, - "BiorxivClusteringS2S": 20.71, - "BlurbsClusteringP2P": 11.37, - "BlurbsClusteringS2S": 8.01, - "MedrxivClusteringP2P": 26.65, - "MedrxivClusteringS2S": 21.5, - "RedditClustering": 28.84, - "RedditClusteringP2P": 7.37, - "StackExchangeClustering": 39.04, - "StackExchangeClusteringP2P": 30.23, - "TenKGnadClusteringP2P": 15.89, - "TenKGnadClusteringS2S": 4.84, - "TwentyNewsgroupsClustering": 27.42 + "Model": "bge-small-zh-v1.5", + "CLSClusteringP2P": 38.14, + "CLSClusteringS2S": 35.14, + "ThuNewsClusteringP2P": 54.22, + "ThuNewsClusteringS2S": 49.22 } ] }, "PairClassification": { "ap": [ { - "Model": "komninos", - "SprintDuplicateQuestions": 85.55, - "TwitterSemEval2015": 53.85, - "TwitterURLCorpus": 79.41 + "Model": "bge-small-zh-v1.5", + "Cmnli": 76.24, + "Ocnli": 64.57 } ] }, "Reranking": { "map": [ { - "Model": "komninos", - "AskUbuntuDupQuestions": 50.88, - "MindSmallReranking": 28.92, - "SciDocsRR": 63.55, - "StackOverflowDupQuestions": 35.65 + "Model": "bge-small-zh-v1.5", + "CMedQAv1": 77.4, + "CMedQAv2": 79.86, + "MMarcoReranking": 20.5, + "T2Reranking": 65.9 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "komninos", - "ArguAna": 30.96, - "CQADupstackRetrieval": 16.79, - "ClimateFEVER": 14.87, - "DBPedia": 15.88, - "FEVER": 15.56, - "FiQA2018": 10.49, - "HotpotQA": 20.77, - "MSMARCO": 9.75, - "NFCorpus": 11.79, - "NQ": 12.75, - "QuoraRetrieval": 71.57, - "SCIDOCS": 8.47, - "SciFact": 29.53, - "TRECCOVID": 35.92, - "Touche2020": 13.17 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "komninos", - "BIOSSES": 50.25, - "SICK-R": 55.49, - "STS12": 53.51, - "STS13": 70.8, - "STS14": 63.56, - "STS15": 74.08, - "STS16": 64.6, - "STS17 (ar-ar)": 13.78, - "STS17 (en-ar)": 9.08, - "STS17 (en-de)": -3.11, - "STS17 (en-en)": 76.91, - "STS17 (en-tr)": -0.45, - "STS17 (es-en)": -8.18, - "STS17 (es-es)": 48.23, - "STS17 (fr-en)": 5.81, - "STS17 (it-en)": 3.64, - "STS17 (ko-ko)": 2.54, - "STS17 (nl-en)": 0.44, - "STS22 (ar)": 32.42, - "STS22 (de)": 33.04, - "STS22 (de-en)": 28.65, - "STS22 (de-fr)": 14.77, - "STS22 (de-pl)": 11.21, - "STS22 (en)": 53.89, - "STS22 (es)": 48.53, - "STS22 (es-en)": 26.97, - "STS22 (es-it)": 41.1, - "STS22 (fr)": 49.43, - "STS22 (fr-pl)": 39.44, - "STS22 (it)": 57.77, - "STS22 (pl)": 12.47, - "STS22 (pl-en)": 45.55, - "STS22 (ru)": 19.44, - "STS22 (tr)": 47.38, - "STS22 (zh)": 4.78, - "STS22 (zh-en)": 14.05, - "STSBenchmark": 61.55 + "Model": "bge-small-zh-v1.5", + "CmedqaRetrieval": 35.11, + "CovidRetrieval": 70.14, + "DuRetrieval": 77.28, + "EcomRetrieval": 55.71, + "MMarcoRetrieval": 63.48, + "MedicalRetrieval": 49.8, + "T2Retrieval": 76.43, + "VideoRetrieval": 66.19 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-small-zh-v1.5", + "AFQMC": 33.42, + "ATEC": 43.01, + "BQ": 55.22, + "LCQMC": 72.19, + "PAWSX": 9.26, + "QBQTC": 35.29, + "STS22 (zh)": 67.72, + "STSB": 76.73 } ] }, "Summarization": { "spearman": [ { - "Model": "komninos", - "SummEval": 30.49 + "Model": "bge-small-zh-v1.5" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "komninos" + "Model": "bge-small-zh-v1.5" } ] } }, - "voyage-lite-01-instruct": { + "nb-bert-large": { "BitextMining": { "f1": [ { - "Model": "voyage-lite-01-instruct" + "Model": "nb-bert-large", + "BornholmBitextMining": 4.53 } ] }, "Classification": { "accuracy": [ { - "Model": "voyage-lite-01-instruct", - "AmazonCounterfactualClassification (en)": 71.43, - "AmazonPolarityClassification": 96.41, - "AmazonReviewsClassification (en)": 57.06, - "Banking77Classification": 81.64, - "EmotionClassification": 48.29, - "ImdbClassification": 95.49, - "MTOPDomainClassification (en)": 96.3, - "MTOPIntentClassification (en)": 67.93, - "MassiveIntentClassification (en)": 71.29, - "MassiveScenarioClassification (en)": 76.74, - "ToxicConversationsClassification": 75.45, - "TweetSentimentExtractionClassification": 59.44 + "Model": "nb-bert-large", + "AngryTweetsClassification": 52.14, + "DKHateClassification": 62.13, + "DanishPoliticalCommentsClassification": 35.04, + "LccSentimentClassification": 56.27, + "MassiveIntentClassification (da)": 57.03, + "MassiveIntentClassification (nb)": 62.68, + "MassiveIntentClassification (sv)": 55.02, + "MassiveScenarioClassification (da)": 60.43, + "MassiveScenarioClassification (nb)": 67.44, + "MassiveScenarioClassification (sv)": 57.12, + "NoRecClassification": 55.46, + "NordicLangClassification": 85.27, + "NorwegianParliament": 62.58, + "ScalaDaClassification": 62.85, + "ScalaNbClassification": 66.97 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-lite-01-instruct", - "ArxivClusteringP2P": 47.92, - "ArxivClusteringS2S": 42.42, - "BiorxivClusteringP2P": 38.72, - "BiorxivClusteringS2S": 36.6, - "MedrxivClusteringP2P": 34.04, - "MedrxivClusteringS2S": 32.81, - "RedditClustering": 61.56, - "RedditClusteringP2P": 65.35, - "StackExchangeClustering": 70.16, - "StackExchangeClusteringP2P": 38.23, - "TwentyNewsgroupsClustering": 53.56 + "Model": "nb-bert-large" } ] }, "PairClassification": { "ap": [ { - "Model": "voyage-lite-01-instruct", - "SprintDuplicateQuestions": 96.01, - "TwitterSemEval2015": 76.87, - "TwitterURLCorpus": 86.84 + "Model": "nb-bert-large" } ] }, "Reranking": { "map": [ { - "Model": "voyage-lite-01-instruct", - "AskUbuntuDupQuestions": 65.77, - "MindSmallReranking": 31.69, - "SciDocsRR": 87.03, - "StackOverflowDupQuestions": 54.49 + "Model": "nb-bert-large" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-lite-01-instruct", - "ArguAna": 58.73, - "CQADupstackRetrieval": 45.11, - "ClimateFEVER": 37.47, - "DBPedia": 43.42, - "FEVER": 89.71, - "FiQA2018": 44.79, - "HotpotQA": 70.46, - "MSMARCO": 39.66, - "NFCorpus": 43.33, - "NQ": 60.65, - "QuoraRetrieval": 87.83, - "SCIDOCS": 23.19, - "SciFact": 73.64, - "TRECCOVID": 78.92, - "Touche2020": 36.83 + "Model": "nb-bert-large" } ] }, "STS": { "spearman": [ { - "Model": "voyage-lite-01-instruct", - "BIOSSES": 84.85, - "SICK-R": 79.71, - "STS12": 77.09, - "STS13": 88.91, - "STS14": 82.08, - "STS15": 89.21, - "STS16": 84.74, - "STS17 (en-en)": 90.73, - "STS22 (en)": 62.1, - "STSBenchmark": 89.86 + "Model": "nb-bert-large" } ] }, "Summarization": { "spearman": [ { - "Model": "voyage-lite-01-instruct", - "SummEval": 30.97 + "Model": "nb-bert-large" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "voyage-lite-01-instruct" + "Model": "nb-bert-large" } ] } }, - "bm25": { + "e5-large-v2": { "BitextMining": { "f1": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "Classification": { "accuracy": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "Clustering": { "v_measure": [ { - "Model": "bm25" + "Model": "e5-large-v2", + "BiorxivClusteringP2P": 36.72, + "BiorxivClusteringS2S": 35.47, + "MedrxivClusteringP2P": 31.45, + "MedrxivClusteringS2S": 29.91, + "RedditClustering": 55.5, + "RedditClusteringP2P": 63.71, + "StackExchangeClustering": 65.23, + "StackExchangeClusteringP2P": 33.62, + "TwentyNewsgroupsClustering": 48.73 } ] }, "PairClassification": { "ap": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "Reranking": { "map": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bm25", - "BrightRetrieval (robotics)": 13.53, - "BrightRetrieval (pony)": 7.93, - "BrightRetrieval (leetcode)": 24.37, - "BrightRetrieval (earth_science)": 27.06, - "BrightRetrieval (stackoverflow)": 16.55, - "BrightRetrieval (economics)": 14.87, - "BrightRetrieval (theoremqa_questions)": 9.78, - "BrightRetrieval (theoremqa_theorems)": 4.25, - "BrightRetrieval (psychology)": 12.51, - "BrightRetrieval (sustainable_living)": 15.22, - "BrightRetrieval (biology)": 19.19, - "BrightRetrieval (aops)": 6.2 + "Model": "e5-large-v2" } ] }, "STS": { "spearman": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "Summarization": { "spearman": [ { - "Model": "bm25" + "Model": "e5-large-v2" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bm25", - "Core17InstructionRetrieval": -1.06, - "News21InstructionRetrieval": -2.15, - "Robust04InstructionRetrieval": -3.06 + "Model": "e5-large-v2", + "Core17InstructionRetrieval": 0.12, + "News21InstructionRetrieval": 0.87, + "Robust04InstructionRetrieval": -4.16 } ] } }, - "nomic-embed-text-v1.5-256": { + "universal-sentence-encoder-multilingual-3": { "BitextMining": { "f1": [ { - "Model": "nomic-embed-text-v1.5-256" + "Model": "universal-sentence-encoder-multilingual-3" } ] }, "Classification": { "accuracy": [ { - "Model": "nomic-embed-text-v1.5-256", - "AmazonCounterfactualClassification (en)": 72.94, - "AmazonPolarityClassification": 91.35, - "AmazonReviewsClassification (en)": 45.73, - "Banking77Classification": 83.69, - "EmotionClassification": 45.88, - "ImdbClassification": 83.99, - "MTOPDomainClassification (en)": 91.68, - "MTOPIntentClassification (en)": 72.47, - "MassiveIntentClassification (en)": 71.76, - "MassiveScenarioClassification (en)": 75.67, - "ToxicConversationsClassification": 70.87, - "TweetSentimentExtractionClassification": 59.2 + "Model": "universal-sentence-encoder-multilingual-3", + "AmazonReviewsClassification (fr)": 33.51, + "MTOPDomainClassification (fr)": 85.5, + "MTOPIntentClassification (fr)": 53.98, + "MasakhaNEWSClassification (fra)": 82.06, + "MassiveIntentClassification (fr)": 61.19, + "MassiveScenarioClassification (fr)": 70.22 } ] }, "Clustering": { "v_measure": [ { - "Model": "nomic-embed-text-v1.5-256", - "ArxivClusteringP2P": 44.82, - "ArxivClusteringS2S": 35.32, - "BiorxivClusteringP2P": 38.19, - "BiorxivClusteringS2S": 31.83, - "MedrxivClusteringP2P": 34.08, - "MedrxivClusteringS2S": 30.98, - "RedditClustering": 54.92, - "RedditClusteringP2P": 60.23, - "StackExchangeClustering": 61.81, - "StackExchangeClusteringP2P": 34.03, - "TwentyNewsgroupsClustering": 48.56 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloProfClusteringP2P": 56.9, + "AlloProfClusteringS2S": 37.84, + "HALClusteringS2S": 18.95, + "MLSUMClusteringP2P": 43.9, + "MLSUMClusteringS2S": 35.5, + "MasakhaNEWSClusteringP2P (fra)": 60.57, + "MasakhaNEWSClusteringS2S (fra)": 40.31 } ] }, "PairClassification": { "ap": [ { - "Model": "nomic-embed-text-v1.5-256", - "SprintDuplicateQuestions": 92.31, - "TwitterSemEval2015": 73.61, - "TwitterURLCorpus": 86.34 + "Model": "universal-sentence-encoder-multilingual-3", + "OpusparcusPC (fr)": 91.46, + "PawsXPairClassification (fr)": 52.39 } ] }, "Reranking": { "map": [ { - "Model": "nomic-embed-text-v1.5-256", - "AskUbuntuDupQuestions": 61.34, - "MindSmallReranking": 30.04, - "SciDocsRR": 79.4, - "StackOverflowDupQuestions": 49.95 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloprofReranking": 56.23, + "SyntecReranking": 73.85 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "nomic-embed-text-v1.5-256", - "ArguAna": 45.44, - "CQADupstackRetrieval": 37.61, - "ClimateFEVER": 39.63, - "DBPedia": 39.42, - "FEVER": 84.4, - "FiQA2018": 35.0, - "HotpotQA": 67.78, - "MSMARCO": 41.38, - "NFCorpus": 32.54, - "NQ": 57.1, - "QuoraRetrieval": 87.65, - "SCIDOCS": 16.76, - "SciFact": 68.24, - "TRECCOVID": 80.65, - "Touche2020": 28.49 + "Model": "universal-sentence-encoder-multilingual-3", + "AlloprofRetrieval": 35.27, + "BSARDRetrieval": 0.0, + "MintakaRetrieval (fr)": 26.12, + "SyntecRetrieval": 69.82, + "XPQARetrieval (fr)": 59.59 } - ] - }, - "STS": { - "spearman": [ - { - "Model": "nomic-embed-text-v1.5-256", - "BIOSSES": 81.58, - "SICK-R": 79.24, - "STS12": 78.16, - "STS13": 86.01, - "STS14": 81.25, - "STS15": 86.51, - "STS16": 84.24, - "STS17 (en-en)": 86.44, - "STS22 (en)": 65.14, - "STSBenchmark": 84.8 + ] + }, + "STS": { + "spearman": [ + { + "Model": "universal-sentence-encoder-multilingual-3", + "SICKFr": 71.37, + "STS22 (fr)": 77.91, + "STSBenchmarkMultilingualSTS (fr)": 75.48 } ] }, "Summarization": { "spearman": [ { - "Model": "nomic-embed-text-v1.5-256", - "SummEval": 30.05 + "Model": "universal-sentence-encoder-multilingual-3", + "SummEvalFr": 28.21 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "nomic-embed-text-v1.5-256" + "Model": "universal-sentence-encoder-multilingual-3" } ] } }, - "e5-mistral-7b-instruct-noinstruct": { + "LaBSE-ru-turbo": { "BitextMining": { "f1": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "Tatoeba (rus-Cyrl_eng-Latn)": 93.22 } ] }, "Classification": { "accuracy": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "GeoreviewClassification (rus-Cyrl)": 46.04, + "HeadlineClassification (rus-Cyrl)": 69.98, + "InappropriatenessClassification (rus-Cyrl)": 61.39, + "KinopoiskClassification (rus-Cyrl)": 53.59, + "MassiveIntentClassification (rus-Cyrl)": 66.08, + "MassiveScenarioClassification (rus-Cyrl)": 71.13, + "RuReviewsClassification (rus-Cyrl)": 64.58, + "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.67, + "RuSciBenchOECDClassification (rus-Cyrl)": 43.58 } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "GeoreviewClusteringP2P (rus-Cyrl)": 64.55, + "MLSUMClusteringP2P (rus-Cyrl)": 45.7, + "MLSUMClusteringS2S (rus-Cyrl)": 42.93, + "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.64, + "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48 } ] }, "PairClassification": { "ap": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "OpusparcusPC (rus-Cyrl)": 89.32, + "TERRa (rus-Cyrl)": 57.81 } ] }, "Reranking": { "map": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "RuBQReranking (rus-Cyrl)": 68.65 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-mistral-7b-instruct-noinstruct", - "ARCChallenge": 20.48, - "AlphaNLI": 18.88, - "HellaSwag": 32.25, - "PIQA": 32.8, - "Quail": 6.25, - "RARbCode": 79.84, - "RARbMath": 76.19, - "SIQA": 5.08, - "SpartQA": 10.87, - "TempReasonL1": 3.04, - "TempReasonL2Fact": 35.63, - "TempReasonL2Pure": 9.32, - "TempReasonL3Fact": 30.41, - "TempReasonL3Pure": 14.39, - "WinoGrande": 45.18 + "Model": "LaBSE-ru-turbo", + "RiaNewsRetrieval (rus-Cyrl)": 69.36, + "RuBQRetrieval (rus-Cyrl)": 65.71 } ] }, "STS": { "spearman": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo", + "RUParaPhraserSTS (rus-Cyrl)": 72.97, + "RuSTSBenchmarkSTS (rus-Cyrl)": 81.77, + "STS22 (rus-Cyrl)": 62.89, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81 } ] }, "Summarization": { "spearman": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-mistral-7b-instruct-noinstruct" + "Model": "LaBSE-ru-turbo" } ] } }, - "e5-base-4k": { + "bge-large-zh-noinstruct": { "BitextMining": { "f1": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct" } ] }, "Classification": { "accuracy": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct", + "AmazonReviewsClassification (zh)": 41.94, + "IFlyTek": 45.32, + "JDReview": 85.38, + "MassiveIntentClassification (zh-CN)": 66.96, + "MassiveScenarioClassification (zh-CN)": 73.39, + "MultilingualSentiment": 73.7, + "OnlineShopping": 91.66, + "TNews": 52.05, + "Waimai": 86.83 } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct", + "CLSClusteringP2P": 41.23, + "CLSClusteringS2S": 40.04, + "ThuNewsClusteringP2P": 62.03, + "ThuNewsClusteringS2S": 56.75 } ] }, "PairClassification": { "ap": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct", + "Cmnli": 82.17, + "Ocnli": 71.37 } ] }, "Reranking": { "map": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct", + "CMedQAv1": 81.72, + "CMedQAv2": 84.64, + "MMarcoReranking": 27.1, + "T2Reranking": 66.16 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-base-4k", - "LEMBNarrativeQARetrieval": 30.35, - "LEMBNeedleRetrieval": 41.5, - "LEMBPasskeyRetrieval": 67.25, - "LEMBQMSumRetrieval": 35.6, - "LEMBSummScreenFDRetrieval": 95.23, - "LEMBWikimQARetrieval": 69.19 + "Model": "bge-large-zh-noinstruct", + "CmedqaRetrieval": 41.03, + "CovidRetrieval": 75.07, + "DuRetrieval": 84.68, + "EcomRetrieval": 65.6, + "MMarcoRetrieval": 81.38, + "MedicalRetrieval": 58.28, + "T2Retrieval": 84.39, + "VideoRetrieval": 73.93 } ] }, "STS": { "spearman": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct", + "AFQMC": 43.06, + "ATEC": 48.29, + "BQ": 60.53, + "LCQMC": 74.71, + "PAWSX": 16.64, + "QBQTC": 35.2, + "STS22 (zh)": 67.19, + "STSB": 78.41 } ] }, "Summarization": { "spearman": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-base-4k" + "Model": "bge-large-zh-noinstruct" } ] } }, - "sentence-t5-xl": { + "bm25s": { "BitextMining": { - "f1": [ - { - "Model": "sentence-t5-xl", - "BUCC (de-en)": 95.04, - "BUCC (fr-en)": 94.96, - "BUCC (ru-en)": 8.33, - "BUCC (zh-en)": 1.3, - "Tatoeba (afr-eng)": 41.84, - "Tatoeba (amh-eng)": 0.03, - "Tatoeba (ang-eng)": 37.87, - "Tatoeba (ara-eng)": 0.61, - "Tatoeba (arq-eng)": 0.74, - "Tatoeba (arz-eng)": 0.42, - "Tatoeba (ast-eng)": 65.41, - "Tatoeba (awa-eng)": 1.46, - "Tatoeba (aze-eng)": 8.79, - "Tatoeba (bel-eng)": 5.76, - "Tatoeba (ben-eng)": 0.01, - "Tatoeba (ber-eng)": 5.92, - "Tatoeba (bos-eng)": 16.12, - "Tatoeba (bre-eng)": 6.12, - "Tatoeba (bul-eng)": 9.06, - "Tatoeba (cat-eng)": 57.4, - "Tatoeba (cbk-eng)": 57.68, - "Tatoeba (ceb-eng)": 12.56, - "Tatoeba (ces-eng)": 9.47, - "Tatoeba (cha-eng)": 27.13, - "Tatoeba (cmn-eng)": 1.82, - "Tatoeba (cor-eng)": 3.87, - "Tatoeba (csb-eng)": 14.41, - "Tatoeba (cym-eng)": 6.69, - "Tatoeba (dan-eng)": 54.87, - "Tatoeba (deu-eng)": 93.72, - "Tatoeba (dsb-eng)": 14.74, - "Tatoeba (dtp-eng)": 5.84, - "Tatoeba (ell-eng)": 0.6, - "Tatoeba (epo-eng)": 30.8, - "Tatoeba (est-eng)": 5.39, - "Tatoeba (eus-eng)": 11.9, - "Tatoeba (fao-eng)": 28.08, - "Tatoeba (fin-eng)": 6.81, - "Tatoeba (fra-eng)": 85.29, - "Tatoeba (fry-eng)": 38.68, - "Tatoeba (gla-eng)": 2.96, - "Tatoeba (gle-eng)": 3.74, - "Tatoeba (glg-eng)": 70.0, - "Tatoeba (gsw-eng)": 30.49, - "Tatoeba (heb-eng)": 0.87, - "Tatoeba (hin-eng)": 0.1, - "Tatoeba (hrv-eng)": 17.43, - "Tatoeba (hsb-eng)": 14.69, - "Tatoeba (hun-eng)": 7.28, - "Tatoeba (hye-eng)": 0.77, - "Tatoeba (ido-eng)": 46.65, - "Tatoeba (ile-eng)": 59.43, - "Tatoeba (ina-eng)": 82.71, - "Tatoeba (ind-eng)": 37.26, - "Tatoeba (isl-eng)": 11.21, - "Tatoeba (ita-eng)": 79.77, - "Tatoeba (jav-eng)": 7.81, - "Tatoeba (jpn-eng)": 0.91, - "Tatoeba (kab-eng)": 2.23, - "Tatoeba (kat-eng)": 1.48, - "Tatoeba (kaz-eng)": 1.77, - "Tatoeba (khm-eng)": 0.38, - "Tatoeba (kor-eng)": 1.96, - "Tatoeba (kur-eng)": 12.11, - "Tatoeba (kzj-eng)": 6.13, - "Tatoeba (lat-eng)": 27.84, - "Tatoeba (lfn-eng)": 45.89, - "Tatoeba (lit-eng)": 5.94, - "Tatoeba (lvs-eng)": 8.11, - "Tatoeba (mal-eng)": 0.59, - "Tatoeba (mar-eng)": 0.03, - "Tatoeba (max-eng)": 21.7, - "Tatoeba (mhr-eng)": 0.68, - "Tatoeba (mkd-eng)": 5.92, - "Tatoeba (mon-eng)": 2.39, - "Tatoeba (nds-eng)": 45.04, - "Tatoeba (nld-eng)": 64.75, - "Tatoeba (nno-eng)": 36.74, - "Tatoeba (nob-eng)": 54.77, - "Tatoeba (nov-eng)": 57.12, - "Tatoeba (oci-eng)": 34.39, - "Tatoeba (orv-eng)": 2.04, - "Tatoeba (pam-eng)": 8.34, - "Tatoeba (pes-eng)": 0.87, - "Tatoeba (pms-eng)": 38.06, - "Tatoeba (pol-eng)": 28.35, - "Tatoeba (por-eng)": 83.61, - "Tatoeba (ron-eng)": 65.27, - "Tatoeba (rus-eng)": 30.42, - "Tatoeba (slk-eng)": 13.19, - "Tatoeba (slv-eng)": 13.49, - "Tatoeba (spa-eng)": 89.18, - "Tatoeba (sqi-eng)": 14.66, - "Tatoeba (srp-eng)": 13.24, - "Tatoeba (swe-eng)": 60.67, - "Tatoeba (swg-eng)": 34.76, - "Tatoeba (swh-eng)": 8.07, - "Tatoeba (tam-eng)": 0.36, - "Tatoeba (tat-eng)": 1.46, - "Tatoeba (tel-eng)": 0.67, - "Tatoeba (tgl-eng)": 25.22, - "Tatoeba (tha-eng)": 1.58, - "Tatoeba (tuk-eng)": 4.99, - "Tatoeba (tur-eng)": 7.72, - "Tatoeba (tzl-eng)": 38.49, - "Tatoeba (uig-eng)": 0.87, - "Tatoeba (ukr-eng)": 9.12, - "Tatoeba (urd-eng)": 0.0, - "Tatoeba (uzb-eng)": 5.48, - "Tatoeba (vie-eng)": 8.45, - "Tatoeba (war-eng)": 13.75, - "Tatoeba (wuu-eng)": 1.44, - "Tatoeba (xho-eng)": 9.15, - "Tatoeba (yid-eng)": 0.28, - "Tatoeba (yue-eng)": 0.98, - "Tatoeba (zsm-eng)": 35.71 + "f1": [ + { + "Model": "bm25s" } ] }, "Classification": { "accuracy": [ { - "Model": "sentence-t5-xl", - "AmazonCounterfactualClassification (de)": 67.01, - "AmazonCounterfactualClassification (en)": 76.01, - "AmazonCounterfactualClassification (en-ext)": 77.29, - "AmazonCounterfactualClassification (ja)": 45.61, - "AmazonPolarityClassification": 93.17, - "AmazonReviewsClassification (de)": 44.05, - "AmazonReviewsClassification (en)": 48.18, - "AmazonReviewsClassification (es)": 45.01, - "AmazonReviewsClassification (fr)": 43.52, - "AmazonReviewsClassification (ja)": 22.23, - "AmazonReviewsClassification (zh)": 21.88, - "Banking77Classification": 80.88, - "EmotionClassification": 51.95, - "ImdbClassification": 87.54, - "MTOPDomainClassification (de)": 83.28, - "MTOPDomainClassification (en)": 90.73, - "MTOPDomainClassification (es)": 85.32, - "MTOPDomainClassification (fr)": 85.14, - "MTOPDomainClassification (hi)": 20.85, - "MTOPDomainClassification (th)": 15.62, - "MTOPIntentClassification (de)": 54.65, - "MTOPIntentClassification (en)": 68.15, - "MTOPIntentClassification (es)": 57.38, - "MTOPIntentClassification (fr)": 54.39, - "MTOPIntentClassification (hi)": 3.28, - "MTOPIntentClassification (th)": 5.08, - "MasakhaNEWSClassification (fra)": 80.09, - "MassiveIntentClassification (af)": 40.17, - "MassiveIntentClassification (am)": 2.18, - "MassiveIntentClassification (ar)": 4.18, - "MassiveIntentClassification (az)": 30.02, - "MassiveIntentClassification (bn)": 2.6, - "MassiveIntentClassification (cy)": 29.15, - "MassiveIntentClassification (da)": 47.69, - "MassiveIntentClassification (de)": 57.43, - "MassiveIntentClassification (el)": 9.96, - "MassiveIntentClassification (en)": 72.09, - "MassiveIntentClassification (es)": 57.97, - "MassiveIntentClassification (fa)": 3.6, - "MassiveIntentClassification (fi)": 34.02, - "MassiveIntentClassification (fr)": 60.99, - "MassiveIntentClassification (he)": 2.51, - "MassiveIntentClassification (hi)": 3.02, - "MassiveIntentClassification (hu)": 31.66, - "MassiveIntentClassification (hy)": 3.32, - "MassiveIntentClassification (id)": 41.53, - "MassiveIntentClassification (is)": 30.25, - "MassiveIntentClassification (it)": 56.57, - "MassiveIntentClassification (ja)": 3.5, - "MassiveIntentClassification (jv)": 31.67, - "MassiveIntentClassification (ka)": 2.79, - "MassiveIntentClassification (km)": 5.43, - "MassiveIntentClassification (kn)": 2.79, - "MassiveIntentClassification (ko)": 2.67, - "MassiveIntentClassification (lv)": 34.25, - "MassiveIntentClassification (ml)": 2.98, - "MassiveIntentClassification (mn)": 20.99, - "MassiveIntentClassification (ms)": 37.43, - "MassiveIntentClassification (my)": 4.02, - "MassiveIntentClassification (nb)": 45.91, - "MassiveIntentClassification (nl)": 50.51, - "MassiveIntentClassification (pl)": 43.95, - "MassiveIntentClassification (pt)": 57.95, - "MassiveIntentClassification (ro)": 49.37, - "MassiveIntentClassification (ru)": 33.46, - "MassiveIntentClassification (sl)": 36.33, - "MassiveIntentClassification (sq)": 37.65, - "MassiveIntentClassification (sv)": 46.35, - "MassiveIntentClassification (sw)": 30.6, - "MassiveIntentClassification (ta)": 1.79, - "MassiveIntentClassification (te)": 2.26, - "MassiveIntentClassification (th)": 4.02, - "MassiveIntentClassification (tl)": 38.92, - "MassiveIntentClassification (tr)": 32.05, - "MassiveIntentClassification (ur)": 2.7, - "MassiveIntentClassification (vi)": 21.47, - "MassiveIntentClassification (zh-CN)": 0.59, - "MassiveIntentClassification (zh-TW)": 3.24, - "MassiveScenarioClassification (af)": 50.81, - "MassiveScenarioClassification (am)": 6.95, - "MassiveScenarioClassification (ar)": 12.32, - "MassiveScenarioClassification (az)": 38.79, - "MassiveScenarioClassification (bn)": 8.0, - "MassiveScenarioClassification (cy)": 33.91, - "MassiveScenarioClassification (da)": 55.79, - "MassiveScenarioClassification (de)": 65.33, - "MassiveScenarioClassification (el)": 16.89, - "MassiveScenarioClassification (en)": 73.26, - "MassiveScenarioClassification (es)": 62.52, - "MassiveScenarioClassification (fa)": 6.08, - "MassiveScenarioClassification (fi)": 43.34, - "MassiveScenarioClassification (fr)": 66.42, - "MassiveScenarioClassification (he)": 7.55, - "MassiveScenarioClassification (hi)": 7.44, - "MassiveScenarioClassification (hu)": 40.85, - "MassiveScenarioClassification (hy)": 9.25, - "MassiveScenarioClassification (id)": 51.92, - "MassiveScenarioClassification (is)": 40.09, - "MassiveScenarioClassification (it)": 62.94, - "MassiveScenarioClassification (ja)": 7.9, - "MassiveScenarioClassification (jv)": 41.33, - "MassiveScenarioClassification (ka)": 7.76, - "MassiveScenarioClassification (km)": 9.19, - "MassiveScenarioClassification (kn)": 8.36, - "MassiveScenarioClassification (ko)": 6.13, - "MassiveScenarioClassification (lv)": 40.7, - "MassiveScenarioClassification (ml)": 6.98, - "MassiveScenarioClassification (mn)": 27.0, - "MassiveScenarioClassification (ms)": 46.9, - "MassiveScenarioClassification (my)": 9.55, - "MassiveScenarioClassification (nb)": 53.43, - "MassiveScenarioClassification (nl)": 59.65, - "MassiveScenarioClassification (pl)": 49.87, - "MassiveScenarioClassification (pt)": 62.18, - "MassiveScenarioClassification (ro)": 58.22, - "MassiveScenarioClassification (ru)": 40.73, - "MassiveScenarioClassification (sl)": 43.66, - "MassiveScenarioClassification (sq)": 49.25, - "MassiveScenarioClassification (sv)": 57.17, - "MassiveScenarioClassification (sw)": 40.55, - "MassiveScenarioClassification (ta)": 7.46, - "MassiveScenarioClassification (te)": 7.03, - "MassiveScenarioClassification (th)": 8.52, - "MassiveScenarioClassification (tl)": 51.74, - "MassiveScenarioClassification (tr)": 43.01, - "MassiveScenarioClassification (ur)": 9.61, - "MassiveScenarioClassification (vi)": 28.91, - "MassiveScenarioClassification (zh-CN)": 5.86, - "MassiveScenarioClassification (zh-TW)": 7.14, - "ToxicConversationsClassification": 70.95, - "TweetSentimentExtractionClassification": 61.21 + "Model": "bm25s" } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-t5-xl", - "AlloProfClusteringP2P": 60.37, - "AlloProfClusteringS2S": 40.76, - "ArxivClusteringP2P": 41.62, - "ArxivClusteringS2S": 31.17, - "BiorxivClusteringP2P": 36.43, - "BiorxivClusteringS2S": 26.47, - "HALClusteringS2S": 20.28, - "MLSUMClusteringP2P": 41.61, - "MLSUMClusteringS2S": 33.6, - "MasakhaNEWSClusteringP2P (fra)": 62.82, - "MasakhaNEWSClusteringS2S (fra)": 31.74, - "MedrxivClusteringP2P": 32.3, - "MedrxivClusteringS2S": 26.93, - "RedditClustering": 57.03, - "RedditClusteringP2P": 62.34, - "StackExchangeClustering": 67.13, - "StackExchangeClusteringP2P": 34.79, - "TwentyNewsgroupsClustering": 49.53 + "Model": "bm25s" } ] }, "PairClassification": { "ap": [ { - "Model": "sentence-t5-xl", - "OpusparcusPC (fr)": 92.48, - "PawsXPairClassification (fr)": 62.52, - "SprintDuplicateQuestions": 91.44, - "TwitterSemEval2015": 80.89, - "TwitterURLCorpus": 85.86 + "Model": "bm25s" } ] }, "Reranking": { "map": [ { - "Model": "sentence-t5-xl", - "AlloprofReranking": 63.3, - "AskUbuntuDupQuestions": 62.86, - "MindSmallReranking": 29.77, - "SciDocsRR": 75.16, - "StackOverflowDupQuestions": 51.05, - "SyntecReranking": 83.07 + "Model": "bm25s" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bm25s", + "ArguAna": 49.28, + "CQADupstackRetrieval": 31.86, + "ClimateFEVER": 13.62, + "DBPedia": 29.91, + "FEVER": 48.09, + "FiQA2018": 25.14, + "HotpotQA": 56.91, + "MSMARCO": 21.89, + "NFCorpus": 32.08, + "NQ": 28.5, + "QuoraRetrieval": 80.42, + "SCIDOCS": 15.78, + "SciFact": 68.7, + "TRECCOVID": 62.31, + "Touche2020": 33.05 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bm25s" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bm25s" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bm25s" + } + ] + } + }, + "GritLM-7B": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "ap": [] + }, + "Reranking": { + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "GritLM-7B", + "BrightRetrieval (pony)": 21.98, + "BrightRetrieval (robotics)": 17.31, + "BrightRetrieval (economics)": 19.0, + "BrightRetrieval (theoremqa_questions)": 23.34, + "BrightRetrieval (leetcode)": 29.85, + "BrightRetrieval (earth_science)": 32.77, + "BrightRetrieval (stackoverflow)": 11.62, + "BrightRetrieval (sustainable_living)": 18.04, + "BrightRetrieval (biology)": 25.04, + "BrightRetrieval (psychology)": 19.92, + "BrightRetrieval (theoremqa_theorems)": 19.75, + "BrightRetrieval (aops)": 8.91 + } + ], + "recall_at_1": [ + { + "Model": "GritLM-7B", + "BrightRetrieval (biology)": 37.46, + "BrightRetrieval (robotics)": 17.82, + "BrightRetrieval (pony)": 0.0, + "BrightRetrieval (sustainable_living)": 32.36, + "BrightRetrieval (psychology)": 35.35, + "BrightRetrieval (stackoverflow)": 20.08, + "BrightRetrieval (earth_science)": 39.44, + "BrightRetrieval (economics)": 25.73 + } + ] + }, + "STS": { + "spearman": [] + }, + "Summarization": { + "spearman": [] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "GritLM-7B", + "Core17InstructionRetrieval": 2.62, + "News21InstructionRetrieval": -1.01, + "Robust04InstructionRetrieval": -1.68 } ] + } + }, + "bm25": { + "BitextMining": { + "f1": [] + }, + "Classification": { + "accuracy": [] + }, + "Clustering": { + "v_measure": [] + }, + "PairClassification": { + "ap": [] + }, + "Reranking": { + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-t5-xl", - "AlloprofRetrieval": 40.38, - "ArguAna": 39.4, - "BSARDRetrieval": 0.14, - "CQADupstackRetrieval": 40.78, - "ClimateFEVER": 10.61, - "DBPedia": 33.65, - "FEVER": 36.12, - "FiQA2018": 44.71, - "HotpotQA": 37.17, - "MSMARCO": 25.17, - "MintakaRetrieval (fr)": 31.54, - "NFCorpus": 33.18, - "NQ": 46.29, - "QuoraRetrieval": 85.85, - "SCIDOCS": 15.97, - "SciFact": 50.91, - "SyntecRetrieval": 74.24, - "TRECCOVID": 54.77, - "Touche2020": 22.51, - "XPQARetrieval (fr)": 52.14 + "Model": "bm25", + "BrightRetrieval (robotics)": 13.53, + "BrightRetrieval (pony)": 7.93, + "BrightRetrieval (leetcode)": 24.37, + "BrightRetrieval (earth_science)": 27.06, + "BrightRetrieval (stackoverflow)": 16.55, + "BrightRetrieval (economics)": 14.87, + "BrightRetrieval (theoremqa_questions)": 9.78, + "BrightRetrieval (theoremqa_theorems)": 4.75, + "BrightRetrieval (psychology)": 12.51, + "BrightRetrieval (sustainable_living)": 15.22, + "BrightRetrieval (biology)": 19.19, + "BrightRetrieval (aops)": 6.2 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "sentence-t5-xl", - "BIOSSES": 73.12, - "SICK-R": 79.98, - "SICKFr": 75.08, - "STS12": 79.02, - "STS13": 88.8, - "STS14": 84.33, - "STS15": 88.89, - "STS16": 85.31, - "STS17 (ar-ar)": 11.13, - "STS17 (en-ar)": -3.93, - "STS17 (en-de)": 79.04, - "STS17 (en-en)": 88.91, - "STS17 (en-tr)": 13.61, - "STS17 (es-en)": 71.72, - "STS17 (es-es)": 83.42, - "STS17 (fr-en)": 71.38, - "STS17 (it-en)": 69.5, - "STS17 (ko-ko)": 9.61, - "STS17 (nl-en)": 66.12, - "STS22 (ar)": 29.6, - "STS22 (de)": 47.72, - "STS22 (de-en)": 49.64, - "STS22 (de-fr)": 62.21, - "STS22 (de-pl)": 34.34, - "STS22 (en)": 64.32, - "STS22 (es)": 58.16, - "STS22 (es-en)": 69.15, - "STS22 (es-it)": 65.26, - "STS22 (fr)": 77.49, - "STS22 (fr-pl)": 50.71, - "STS22 (it)": 66.91, - "STS22 (pl)": 27.04, - "STS22 (pl-en)": 58.85, - "STS22 (ru)": 26.63, - "STS22 (tr)": 43.36, - "STS22 (zh)": 33.55, - "STS22 (zh-en)": 29.0, - "STSBenchmark": 83.93, - "STSBenchmarkMultilingualSTS (fr)": 79.42 + "Model": "bm25", + "BrightRetrieval (robotics)": 7.43, + "BrightRetrieval (pony)": 5.35, + "BrightRetrieval (biology)": 10.68, + "BrightRetrieval (stackoverflow)": 22.22, + "BrightRetrieval (earth_science)": 15.37, + "BrightRetrieval (psychology)": 8.42, + "BrightRetrieval (sustainable_living)": 10.68, + "BrightRetrieval (economics)": 10.68 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "sentence-t5-xl", - "SummEval": 29.91, - "SummEvalFr": 31.59 - } - ] + "spearman": [] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "sentence-t5-xl" + "Model": "bm25", + "Core17InstructionRetrieval": -1.06, + "News21InstructionRetrieval": -2.15, + "Robust04InstructionRetrieval": -3.06 } ] } }, - "Cohere-embed-english-v3.0": { + "gte-Qwen1.5-7B-instruct": { "BitextMining": { - "f1": [ - { - "Model": "Cohere-embed-english-v3.0" - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "AmazonCounterfactualClassification (en)": 83.16, + "AmazonPolarityClassification": 96.7, + "AmazonReviewsClassification (en)": 62.17, + "AmazonReviewsClassification (zh)": 52.95, + "Banking77Classification": 81.68, + "EmotionClassification": 54.53, + "IFlyTek": 53.77, + "ImdbClassification": 95.58, + "JDReview": 88.2, + "MTOPDomainClassification (en)": 95.75, + "MTOPIntentClassification (en)": 84.26, + "MassiveIntentClassification (zh-CN)": 76.25, + "MassiveIntentClassification (en)": 78.47, + "MassiveScenarioClassification (en)": 78.19, + "MassiveScenarioClassification (zh-CN)": 77.26, + "MultilingualSentiment": 77.42, + "OnlineShopping": 94.48, + "TNews": 51.24, + "ToxicConversationsClassification": 78.75, + "TweetSentimentExtractionClassification": 66.0, + "Waimai": 88.63 } ] }, "Clustering": { "v_measure": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "ArxivClusteringP2P": 56.4, + "ArxivClusteringS2S": 51.45, + "BiorxivClusteringP2P": 49.01, + "BiorxivClusteringS2S": 45.06, + "CLSClusteringP2P": 47.21, + "CLSClusteringS2S": 45.79, + "MedrxivClusteringP2P": 44.37, + "MedrxivClusteringS2S": 42.0, + "RedditClustering": 73.37, + "RedditClusteringP2P": 72.51, + "StackExchangeClustering": 79.07, + "StackExchangeClusteringP2P": 49.57, + "ThuNewsClusteringP2P": 87.43, + "ThuNewsClusteringS2S": 87.9, + "TwentyNewsgroupsClustering": 51.31 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cos_sim_ap": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "Cmnli": 91.81, + "Ocnli": 85.22, + "SprintDuplicateQuestions": 95.99, + "TwitterSemEval2015": 79.36, + "TwitterURLCorpus": 86.79 } ] }, "Reranking": { "map": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "AskUbuntuDupQuestions": 66.0, + "CMedQAv1": 86.37, + "CMedQAv2": 87.41, + "MindSmallReranking": 32.71, + "SciDocsRR": 87.89, + "StackOverflowDupQuestions": 53.93, + "T2Reranking": 68.11 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Cohere-embed-english-v3.0", - "AILACasedocs": 31.54, - "AILAStatutes": 27.15, - "ARCChallenge": 9.89, - "AlphaNLI": 15.1, - "BrightRetrieval (psychology)": 21.82, - "BrightRetrieval (economics)": 20.18, - "BrightRetrieval (robotics)": 16.21, - "BrightRetrieval (biology)": 18.98, - "BrightRetrieval (stackoverflow)": 16.47, - "BrightRetrieval (theoremqa_theorems)": 6.04, - "BrightRetrieval (pony)": 1.77, - "BrightRetrieval (sustainable_living)": 17.69, - "BrightRetrieval (aops)": 6.46, - "BrightRetrieval (theoremqa_questions)": 15.07, - "BrightRetrieval (leetcode)": 26.78, - "BrightRetrieval (earth_science)": 27.45, - "GerDaLIRSmall": 6.05, - "HellaSwag": 26.35, - "LeCaRDv2": 21.02, - "LegalBenchConsumerContractsQA": 77.12, - "LegalBenchCorporateLobbying": 93.68, - "LegalQuAD": 26.08, - "LegalSummarization": 61.7, - "PIQA": 28.49, - "Quail": 4.1, - "RARbCode": 57.19, - "RARbMath": 72.26, - "SIQA": 4.26, - "SpartQA": 3.75, - "TempReasonL1": 1.5, - "TempReasonL2Fact": 35.91, - "TempReasonL2Pure": 1.89, - "TempReasonL3Fact": 27.51, - "TempReasonL3Pure": 8.53, - "WinoGrande": 58.01 + "Model": "gte-Qwen1.5-7B-instruct", + "ArguAna": 62.65, + "BrightRetrieval (stackoverflow)": 19.85, + "BrightRetrieval (earth_science)": 36.22, + "BrightRetrieval (leetcode)": 25.46, + "BrightRetrieval (theoremqa_questions)": 26.97, + "BrightRetrieval (economics)": 17.72, + "BrightRetrieval (robotics)": 13.47, + "BrightRetrieval (pony)": 9.79, + "BrightRetrieval (aops)": 14.36, + "BrightRetrieval (psychology)": 24.61, + "BrightRetrieval (theoremqa_theorems)": 30.8, + "BrightRetrieval (biology)": 30.92, + "BrightRetrieval (sustainable_living)": 14.93, + "CQADupstackRetrieval": 40.64, + "ClimateFEVER": 44.0, + "CmedqaRetrieval": 43.47, + "CovidRetrieval": 80.87, + "DBPedia": 48.04, + "DuRetrieval": 86.01, + "EcomRetrieval": 66.46, + "FEVER": 93.35, + "FiQA2018": 55.31, + "HotpotQA": 72.25, + "MMarcoRetrieval": 73.83, + "MSMARCO": 41.68, + "MedicalRetrieval": 61.33, + "NFCorpus": 38.25, + "NQ": 61.79, + "QuoraRetrieval": 89.61, + "SCIDOCS": 27.69, + "SciFact": 75.31, + "T2Retrieval": 83.58, + "TRECCOVID": 72.72, + "Touche2020": 20.3, + "VideoRetrieval": 69.41 + } + ], + "recall_at_1": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "BrightRetrieval (economics)": 25.73, + "BrightRetrieval (pony)": 1.32, + "BrightRetrieval (robotics)": 21.29, + "BrightRetrieval (biology)": 39.24, + "BrightRetrieval (earth_science)": 36.13, + "BrightRetrieval (stackoverflow)": 23.5, + "BrightRetrieval (psychology)": 42.28, + "BrightRetrieval (sustainable_living)": 33.1 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "AFQMC": 58.47, + "ATEC": 55.46, + "BIOSSES": 81.12, + "BQ": 77.59, + "LCQMC": 76.29, + "PAWSX": 50.22, + "QBQTC": 31.82, + "SICK-R": 79.15, + "STS12": 76.52, + "STS13": 88.63, + "STS14": 83.32, + "STS15": 87.5, + "STS16": 86.39, + "STS17 (en-en)": 87.79, + "STS22 (en)": 66.4, + "STS22 (zh)": 67.36, + "STSB": 81.37, + "STSBenchmark": 87.35 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "gte-Qwen1.5-7B-instruct", + "SummEval": 31.46 } ] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "Cohere-embed-english-v3.0", - "Core17InstructionRetrieval": 2.8, - "News21InstructionRetrieval": 0.2, - "Robust04InstructionRetrieval": -3.63 - } - ] + "p-MRR": [] } }, - "sentence-camembert-base": { + "voyage-large-2-instruct": { "BitextMining": { - "f1": [ - { - "Model": "sentence-camembert-base" - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "sentence-camembert-base", - "AmazonReviewsClassification (fr)": 36.03, - "MTOPDomainClassification (fr)": 77.1, - "MTOPIntentClassification (fr)": 43.44, - "MasakhaNEWSClassification (fra)": 70.36, - "MassiveIntentClassification (fr)": 51.59, - "MassiveScenarioClassification (fr)": 61.28 + "Model": "voyage-large-2-instruct", + "AmazonCounterfactualClassification (en)": 77.6, + "AmazonPolarityClassification": 96.58, + "AmazonReviewsClassification (en)": 50.77, + "Banking77Classification": 86.96, + "EmotionClassification": 59.81, + "ImdbClassification": 96.13, + "MTOPDomainClassification (en)": 98.86, + "MTOPIntentClassification (en)": 86.97, + "MassiveIntentClassification (en)": 81.08, + "MassiveScenarioClassification (en)": 87.95, + "ToxicConversationsClassification": 83.58, + "TweetSentimentExtractionClassification": 71.55 } ] }, "Clustering": { "v_measure": [ { - "Model": "sentence-camembert-base", - "AlloProfClusteringP2P": 59.09, - "AlloProfClusteringS2S": 38.92, - "HALClusteringS2S": 20.22, - "MLSUMClusteringP2P": 35.98, - "MLSUMClusteringS2S": 27.05, - "MasakhaNEWSClusteringP2P (fra)": 36.03, - "MasakhaNEWSClusteringS2S (fra)": 30.77 + "Model": "voyage-large-2-instruct", + "ArxivClusteringP2P": 51.81, + "ArxivClusteringS2S": 44.73, + "BiorxivClusteringP2P": 46.07, + "BiorxivClusteringS2S": 40.64, + "MedrxivClusteringP2P": 42.94, + "MedrxivClusteringS2S": 41.44, + "RedditClustering": 68.5, + "RedditClusteringP2P": 64.86, + "StackExchangeClustering": 74.16, + "StackExchangeClusteringP2P": 45.1, + "TwentyNewsgroupsClustering": 66.62 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cos_sim_ap": [ { - "Model": "sentence-camembert-base", - "OpusparcusPC (fr)": 92.05, - "PawsXPairClassification (fr)": 57.44 + "Model": "voyage-large-2-instruct", + "SprintDuplicateQuestions": 94.5, + "TwitterSemEval2015": 86.32, + "TwitterURLCorpus": 86.9 } ] }, "Reranking": { "map": [ { - "Model": "sentence-camembert-base", - "AlloprofReranking": 48.68, - "SyntecReranking": 79.75 + "Model": "voyage-large-2-instruct", + "AskUbuntuDupQuestions": 64.92, + "MindSmallReranking": 30.97, + "SciDocsRR": 89.34, + "StackOverflowDupQuestions": 55.11 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "sentence-camembert-base", - "AlloprofRetrieval": 21.94, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 13.36, - "SyntecRetrieval": 68.62, - "XPQARetrieval (fr)": 57.92 + "Model": "voyage-large-2-instruct", + "ArguAna": 64.06, + "BrightRetrieval (theoremqa_questions)": 26.06, + "BrightRetrieval (earth_science)": 25.09, + "BrightRetrieval (leetcode)": 30.6, + "BrightRetrieval (economics)": 19.85, + "BrightRetrieval (robotics)": 11.21, + "BrightRetrieval (psychology)": 24.79, + "BrightRetrieval (aops)": 7.45, + "BrightRetrieval (sustainable_living)": 15.58, + "BrightRetrieval (pony)": 1.48, + "BrightRetrieval (theoremqa_theorems)": 11.1, + "BrightRetrieval (biology)": 23.55, + "BrightRetrieval (stackoverflow)": 15.03, + "CQADupstackRetrieval": 46.6, + "ClimateFEVER": 32.65, + "DBPedia": 46.03, + "FEVER": 91.47, + "FiQA2018": 59.76, + "HotpotQA": 70.86, + "MSMARCO": 40.6, + "NFCorpus": 40.32, + "NQ": 65.92, + "QuoraRetrieval": 87.4, + "SCIDOCS": 24.32, + "SciFact": 79.99, + "TRECCOVID": 85.07, + "Touche2020": 39.16 + } + ], + "recall_at_1": [ + { + "Model": "voyage-large-2-instruct", + "BrightRetrieval (psychology)": 41.58, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (biology)": 34.38, + "BrightRetrieval (stackoverflow)": 13.68, + "BrightRetrieval (pony)": 1.28, + "BrightRetrieval (economics)": 26.7, + "BrightRetrieval (sustainable_living)": 31.1, + "BrightRetrieval (earth_science)": 35.35 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "sentence-camembert-base", - "SICKFr": 74.18, - "STS22 (fr)": 77.54, - "STSBenchmarkMultilingualSTS (fr)": 81.64 + "Model": "voyage-large-2-instruct", + "BIOSSES": 89.24, + "SICK-R": 83.16, + "STS12": 73.34, + "STS13": 88.49, + "STS14": 86.49, + "STS15": 91.13, + "STS16": 85.68, + "STS17 (en-en)": 90.06, + "STS22 (en)": 66.32, + "STSBenchmark": 89.22 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "sentence-camembert-base", - "SummEvalFr": 28.77 + "Model": "voyage-large-2-instruct", + "SummEval": 30.84 } ] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "sentence-camembert-base" - } - ] + "p-MRR": [] } }, - "Cohere-embed-multilingual-v3.0": { + "text-embedding-3-large": { "BitextMining": { - "f1": [ - { - "Model": "Cohere-embed-multilingual-v3.0" - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AmazonReviewsClassification (fr)": 41.89, - "MTOPDomainClassification (fr)": 86.23, - "MTOPIntentClassification (fr)": 61.07, - "MasakhaNEWSClassification (fra)": 83.06, - "MassiveIntentClassification (fr)": 62.94, - "MassiveScenarioClassification (fr)": 67.29 + "Model": "text-embedding-3-large", + "AmazonCounterfactualClassification (en)": 78.93, + "AmazonPolarityClassification": 92.85, + "AmazonReviewsClassification (en)": 48.7, + "Banking77Classification": 85.69, + "EmotionClassification": 51.58, + "ImdbClassification": 87.67, + "MTOPDomainClassification (en)": 95.36, + "MTOPIntentClassification (en)": 75.07, + "MassiveIntentClassification (en)": 74.64, + "MassiveScenarioClassification (en)": 79.79, + "ToxicConversationsClassification": 72.92, + "TweetSentimentExtractionClassification": 62.22 } ] }, "Clustering": { "v_measure": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloProfClusteringP2P": 63.53, - "AlloProfClusteringS2S": 36.18, - "HALClusteringS2S": 19.9, - "MLSUMClusteringP2P": 45.08, - "MLSUMClusteringS2S": 34.75, - "MasakhaNEWSClusteringP2P (fra)": 53.18, - "MasakhaNEWSClusteringS2S (fra)": 32.31 + "Model": "text-embedding-3-large", + "ArxivClusteringP2P": 49.01, + "ArxivClusteringS2S": 44.45, + "BiorxivClusteringP2P": 38.03, + "BiorxivClusteringS2S": 36.53, + "MedrxivClusteringP2P": 32.7, + "MedrxivClusteringS2S": 31.27, + "RedditClustering": 67.84, + "RedditClusteringP2P": 67.96, + "StackExchangeClustering": 76.26, + "StackExchangeClusteringP2P": 36.88, + "TwentyNewsgroupsClustering": 58.14 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cos_sim_ap": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "OpusparcusPC (fr)": 94.08, - "PawsXPairClassification (fr)": 61.26 + "Model": "text-embedding-3-large", + "SprintDuplicateQuestions": 92.25, + "TwitterSemEval2015": 77.13, + "TwitterURLCorpus": 87.78 } ] }, "Reranking": { "map": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofReranking": 51.01, - "SyntecReranking": 85.72 + "Model": "text-embedding-3-large", + "AskUbuntuDupQuestions": 65.03, + "MindSmallReranking": 29.86, + "SciDocsRR": 86.66, + "StackOverflowDupQuestions": 55.08 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-large", + "AILACasedocs": 39.0, + "AILAStatutes": 41.31, + "ARCChallenge": 23.98, + "AlphaNLI": 37.27, + "ArguAna": 58.05, + "BrightRetrieval (theoremqa_questions)": 22.22, + "BrightRetrieval (leetcode)": 23.65, + "BrightRetrieval (earth_science)": 26.27, + "BrightRetrieval (psychology)": 27.52, + "BrightRetrieval (robotics)": 12.93, + "BrightRetrieval (economics)": 19.98, + "BrightRetrieval (stackoverflow)": 12.49, + "BrightRetrieval (biology)": 23.67, + "BrightRetrieval (theoremqa_theorems)": 10.82, + "BrightRetrieval (pony)": 2.45, + "BrightRetrieval (sustainable_living)": 20.32, + "BrightRetrieval (aops)": 8.45, + "CQADupstackRetrieval": 47.54, + "ClimateFEVER": 30.27, + "DBPedia": 44.76, + "FEVER": 87.94, + "FiQA2018": 55.0, + "GerDaLIRSmall": 32.77, + "HellaSwag": 34.12, + "HotpotQA": 71.58, + "LEMBNarrativeQARetrieval": 44.09, + "LEMBQMSumRetrieval": 32.49, + "LEMBSummScreenFDRetrieval": 84.8, + "LEMBWikimQARetrieval": 54.16, + "LeCaRDv2": 57.2, + "LegalBenchConsumerContractsQA": 79.39, + "LegalBenchCorporateLobbying": 95.09, + "LegalQuAD": 57.47, + "LegalSummarization": 71.55, + "MSMARCO": 40.24, + "NFCorpus": 42.07, + "NQ": 61.27, + "PIQA": 41.96, + "Quail": 10.15, + "QuoraRetrieval": 89.05, + "RARbCode": 89.64, + "RARbMath": 90.08, + "SCIDOCS": 23.11, + "SIQA": 3.44, + "SciFact": 77.77, + "SpartQA": 7.51, + "TRECCOVID": 79.56, + "TempReasonL1": 2.13, + "TempReasonL2Fact": 28.65, + "TempReasonL2Pure": 10.34, + "TempReasonL3Fact": 25.52, + "TempReasonL3Pure": 15.28, + "Touche2020": 23.35, + "WinoGrande": 29.11 + }, + { + "Model": "text-embedding-3-large", + "LEMBNeedleRetrieval": 29.25, + "LEMBPasskeyRetrieval": 63.0 } - ] - }, - "Retrieval": { - "ndcg_at_10": [ + ], + "recall_at_1": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofRetrieval": 38.36, - "BSARDRetrieval": 0.14, - "MintakaRetrieval (fr)": 25.44, - "SyntecRetrieval": 79.27, - "XPQARetrieval (fr)": 58.87 + "Model": "text-embedding-3-large", + "BrightRetrieval (earth_science)": 32.26, + "BrightRetrieval (sustainable_living)": 26.34, + "BrightRetrieval (economics)": 24.76, + "BrightRetrieval (stackoverflow)": 11.54, + "BrightRetrieval (pony)": 0.0, + "BrightRetrieval (biology)": 33.09, + "BrightRetrieval (robotics)": 11.88, + "BrightRetrieval (psychology)": 35.15 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "SICKFr": 79.23, - "STS22 (fr)": 82.76, - "STSBenchmarkMultilingualSTS (fr)": 81.84 + "Model": "text-embedding-3-large", + "BIOSSES": 84.68, + "SICK-R": 79.0, + "STS12": 72.84, + "STS13": 86.1, + "STS14": 81.15, + "STS15": 88.49, + "STS16": 85.08, + "STS17 (en-en)": 90.22, + "STS22 (en)": 66.14, + "STSBenchmark": 83.56 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "Cohere-embed-multilingual-v3.0", - "SummEvalFr": 31.26 + "Model": "text-embedding-3-large", + "SummEval": 29.92 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "Cohere-embed-multilingual-v3.0" + "Model": "text-embedding-3-large", + "Core17InstructionRetrieval": -0.2, + "News21InstructionRetrieval": -2.03, + "Robust04InstructionRetrieval": -5.81 } ] } }, - "text2vec-large-chinese": { + "e5-mistral-7b-instruct": { "BitextMining": { - "f1": [ - { - "Model": "text2vec-large-chinese" - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "text2vec-large-chinese", - "AmazonReviewsClassification (zh)": 33.77, - "IFlyTek": 41.54, - "JDReview": 81.56, - "MassiveIntentClassification (zh-CN)": 63.23, - "MassiveScenarioClassification (zh-CN)": 68.45, - "MultilingualSentiment": 58.97, - "OnlineShopping": 83.51, - "TNews": 38.92, - "Waimai": 76.01 + "Model": "e5-mistral-7b-instruct", + "AmazonReviewsClassification (fr)": 36.71, + "MTOPDomainClassification (fr)": 74.8, + "MTOPIntentClassification (fr)": 53.97, + "MasakhaNEWSClassification (fra)": 80.59, + "MassiveIntentClassification (fr)": 46.39, + "MassiveScenarioClassification (fr)": 53.86 } ] }, "Clustering": { "v_measure": [ { - "Model": "text2vec-large-chinese", - "CLSClusteringP2P": 30.13, - "CLSClusteringS2S": 28.77, - "ThuNewsClusteringP2P": 35.05, - "ThuNewsClusteringS2S": 26.14 + "Model": "e5-mistral-7b-instruct", + "AlloProfClusteringP2P": 61.06, + "AlloProfClusteringS2S": 28.12, + "HALClusteringS2S": 19.69, + "MLSUMClusteringP2P": 45.59, + "MLSUMClusteringS2S": 32.0, + "MasakhaNEWSClusteringP2P (fra)": 52.47, + "MasakhaNEWSClusteringS2S (fra)": 49.2 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cos_sim_ap": [ { - "Model": "text2vec-large-chinese", - "Cmnli": 77.67, - "Ocnli": 64.04 + "Model": "e5-mistral-7b-instruct", + "OpusparcusPC (fr)": 88.5, + "PawsXPairClassification (fr)": 63.65 } ] }, "Reranking": { "map": [ { - "Model": "text2vec-large-chinese", - "CMedQAv1": 58.92, - "CMedQAv2": 60.41, - "MMarcoReranking": 12.48, - "T2Reranking": 64.82 + "Model": "e5-mistral-7b-instruct", + "AlloprofReranking": 47.36, + "SyntecReranking": 77.05 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text2vec-large-chinese", - "CmedqaRetrieval": 15.53, - "CovidRetrieval": 60.48, - "DuRetrieval": 51.87, - "EcomRetrieval": 37.58, - "MMarcoRetrieval": 45.96, - "MedicalRetrieval": 30.93, - "T2Retrieval": 50.52, - "VideoRetrieval": 42.65 + "Model": "e5-mistral-7b-instruct", + "AILACasedocs": 38.76, + "AILAStatutes": 38.07, + "AlloprofRetrieval": 16.46, + "BSARDRetrieval": 0.0, + "BrightRetrieval (sustainable_living)": 18.51, + "BrightRetrieval (economics)": 15.49, + "BrightRetrieval (theoremqa_theorems)": 25.09, + "BrightRetrieval (aops)": 7.1, + "BrightRetrieval (theoremqa_questions)": 23.94, + "BrightRetrieval (stackoverflow)": 9.83, + "BrightRetrieval (psychology)": 15.79, + "BrightRetrieval (pony)": 4.81, + "BrightRetrieval (leetcode)": 28.72, + "BrightRetrieval (biology)": 18.84, + "BrightRetrieval (earth_science)": 25.96, + "BrightRetrieval (robotics)": 16.37, + "GerDaLIRSmall": 37.18, + "LEMBNarrativeQARetrieval": 44.62, + "LEMBQMSumRetrieval": 43.63, + "LEMBSummScreenFDRetrieval": 96.82, + "LEMBWikimQARetrieval": 82.11, + "LeCaRDv2": 68.56, + "LegalBenchConsumerContractsQA": 75.46, + "LegalBenchCorporateLobbying": 94.01, + "LegalQuAD": 59.64, + "LegalSummarization": 66.51, + "MintakaRetrieval (fr)": 3.57, + "SyntecRetrieval": 55.9, + "XPQARetrieval (fr)": 41.29 + }, + { + "Model": "e5-mistral-7b-instruct", + "LEMBNeedleRetrieval": 48.25, + "LEMBPasskeyRetrieval": 71.0 + } + ], + "recall_at_1": [ + { + "Model": "e5-mistral-7b-instruct", + "BrightRetrieval (pony)": 1.14, + "BrightRetrieval (robotics)": 17.33, + "BrightRetrieval (economics)": 26.21, + "BrightRetrieval (biology)": 29.93, + "BrightRetrieval (earth_science)": 36.28, + "BrightRetrieval (psychology)": 46.73, + "BrightRetrieval (sustainable_living)": 32.21, + "BrightRetrieval (stackoverflow)": 14.53 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "text2vec-large-chinese", - "AFQMC": 24.51, - "ATEC": 32.45, - "BQ": 44.22, - "LCQMC": 69.16, - "PAWSX": 14.55, - "QBQTC": 29.51, - "STS22 (zh)": 65.94, - "STSB": 79.45 + "Model": "e5-mistral-7b-instruct", + "SICKFr": 64.39, + "STS22 (fr)": 69.82, + "STSBenchmarkMultilingualSTS (fr)": 61.87 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "text2vec-large-chinese" + "Model": "e5-mistral-7b-instruct", + "SummEvalFr": 32.22 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text2vec-large-chinese" + "Model": "e5-mistral-7b-instruct", + "Core17InstructionRetrieval": 0.09, + "News21InstructionRetrieval": -0.86, + "Robust04InstructionRetrieval": -9.59 } ] } }, - "dragon-plus": { + "gte-Qwen2-7B-instruct": { "BitextMining": { - "f1": [ - { - "Model": "dragon-plus" - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "dragon-plus" - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "dragon-plus" - } - ] + "v_measure": [] }, "PairClassification": { - "ap": [ - { - "Model": "dragon-plus" - } - ] + "ap": [] }, "Reranking": { - "map": [ - { - "Model": "dragon-plus" - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "dragon-plus", - "ARCChallenge": 8.91, - "AlphaNLI": 32.1, - "HellaSwag": 27.69, - "PIQA": 28.01, - "Quail": 4.09, - "RARbCode": 17.58, - "RARbMath": 45.09, - "SIQA": 2.0, - "SpartQA": 10.34, - "TempReasonL1": 1.82, - "TempReasonL2Fact": 17.45, - "TempReasonL2Pure": 0.55, - "TempReasonL3Fact": 15.71, - "TempReasonL3Pure": 7.97, - "WinoGrande": 67.18 + "Model": "gte-Qwen2-7B-instruct", + "BrightRetrieval (earth_science)": 40.66, + "BrightRetrieval (sustainable_living)": 20.82, + "BrightRetrieval (theoremqa_theorems)": 34.22, + "BrightRetrieval (aops)": 15.1, + "BrightRetrieval (economics)": 16.18, + "BrightRetrieval (pony)": 1.25, + "BrightRetrieval (stackoverflow)": 13.95, + "BrightRetrieval (leetcode)": 31.07, + "BrightRetrieval (biology)": 32.09, + "BrightRetrieval (theoremqa_questions)": 29.9, + "BrightRetrieval (robotics)": 12.82, + "BrightRetrieval (psychology)": 26.58 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "dragon-plus" + "Model": "gte-Qwen2-7B-instruct", + "BrightRetrieval (psychology)": 46.73, + "BrightRetrieval (biology)": 34.87, + "BrightRetrieval (sustainable_living)": 31.28, + "BrightRetrieval (robotics)": 10.89, + "BrightRetrieval (pony)": 1.17, + "BrightRetrieval (earth_science)": 38.36, + "BrightRetrieval (stackoverflow)": 16.67, + "BrightRetrieval (economics)": 27.67 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "dragon-plus" - } - ] + "spearman": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "dragon-plus" - } - ] + "p-MRR": [] } }, - "bge-small-zh-v1.5": { + "all-mpnet-base-v2": { "BitextMining": { "f1": [ { - "Model": "bge-small-zh-v1.5" + "Model": "all-mpnet-base-v2", + "BornholmBitextMining (dan-Latn)": 27.44, + "Tatoeba (pol-Latn_eng-Latn)": 4.09, + "Tatoeba (ita-Latn_eng-Latn)": 11.1, + "Tatoeba (cat-Latn_eng-Latn)": 9.44, + "Tatoeba (aze-Latn_eng-Latn)": 1.49, + "Tatoeba (eus-Latn_eng-Latn)": 3.94, + "Tatoeba (epo-Latn_eng-Latn)": 7.15, + "Tatoeba (lit-Latn_eng-Latn)": 1.02, + "Tatoeba (ast-Latn_eng-Latn)": 9.78, + "Tatoeba (bul-Cyrl_eng-Latn)": 0.35, + "Tatoeba (ceb-Latn_eng-Latn)": 4.41, + "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, + "Tatoeba (tzl-Latn_eng-Latn)": 3.55, + "Tatoeba (zsm-Latn_eng-Latn)": 4.75, + "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17, + "Tatoeba (pam-Latn_eng-Latn)": 4.32, + "Tatoeba (amh-Ethi_eng-Latn)": 0.0, + "Tatoeba (slv-Latn_eng-Latn)": 3.73, + "Tatoeba (lvs-Latn_eng-Latn)": 2.98, + "Tatoeba (sqi-Latn_eng-Latn)": 3.45, + "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, + "Tatoeba (vie-Latn_eng-Latn)": 4.96, + "Tatoeba (pes-Arab_eng-Latn)": 0.2, + "Tatoeba (por-Latn_eng-Latn)": 10.48, + "Tatoeba (dtp-Latn_eng-Latn)": 3.54, + "Tatoeba (yid-Hebr_eng-Latn)": 0.08, + "Tatoeba (isl-Latn_eng-Latn)": 3.86, + "Tatoeba (cha-Latn_eng-Latn)": 12.2, + "Tatoeba (ron-Latn_eng-Latn)": 7.34, + "Tatoeba (hye-Armn_eng-Latn)": 0.14, + "Tatoeba (mar-Deva_eng-Latn)": 0.11, + "Tatoeba (hin-Deva_eng-Latn)": 0.02, + "Tatoeba (kor-Hang_eng-Latn)": 0.32, + "Tatoeba (srp-Cyrl_eng-Latn)": 1.89, + "Tatoeba (csb-Latn_eng-Latn)": 4.19, + "Tatoeba (jpn-Jpan_eng-Latn)": 1.71, + "Tatoeba (ber-Tfng_eng-Latn)": 4.56, + "Tatoeba (wuu-Hans_eng-Latn)": 0.91, + "Tatoeba (jav-Latn_eng-Latn)": 3.17, + "Tatoeba (nob-Latn_eng-Latn)": 4.37, + "Tatoeba (bre-Latn_eng-Latn)": 3.65, + "Tatoeba (kzj-Latn_eng-Latn)": 3.62, + "Tatoeba (urd-Arab_eng-Latn)": 0.0, + "Tatoeba (ces-Latn_eng-Latn)": 3.56, + "Tatoeba (cbk-Latn_eng-Latn)": 9.33, + "Tatoeba (gla-Latn_eng-Latn)": 2.04, + "Tatoeba (war-Latn_eng-Latn)": 5.14, + "Tatoeba (swh-Latn_eng-Latn)": 6.01, + "Tatoeba (swg-Latn_eng-Latn)": 7.86, + "Tatoeba (glg-Latn_eng-Latn)": 12.0, + "Tatoeba (fao-Latn_eng-Latn)": 7.08, + "Tatoeba (gsw-Latn_eng-Latn)": 10.67, + "Tatoeba (rus-Cyrl_eng-Latn)": 0.14, + "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52, + "Tatoeba (gle-Latn_eng-Latn)": 2.19, + "Tatoeba (slk-Latn_eng-Latn)": 3.4, + "Tatoeba (nno-Latn_eng-Latn)": 5.75, + "Tatoeba (cor-Latn_eng-Latn)": 2.42, + "Tatoeba (nov-Latn_eng-Latn)": 16.61, + "Tatoeba (swe-Latn_eng-Latn)": 6.55, + "Tatoeba (max-Deva_eng-Latn)": 6.46, + "Tatoeba (oci-Latn_eng-Latn)": 8.57, + "Tatoeba (lfn-Latn_eng-Latn)": 6.1, + "Tatoeba (fra-Latn_eng-Latn)": 16.9, + "Tatoeba (ben-Beng_eng-Latn)": 0.0, + "Tatoeba (bel-Cyrl_eng-Latn)": 0.65, + "Tatoeba (lat-Latn_eng-Latn)": 5.78, + "Tatoeba (cmn-Hans_eng-Latn)": 2.22, + "Tatoeba (kat-Geor_eng-Latn)": 0.43, + "Tatoeba (bos-Latn_eng-Latn)": 4.6, + "Tatoeba (xho-Latn_eng-Latn)": 3.3, + "Tatoeba (tha-Thai_eng-Latn)": 0.0, + "Tatoeba (cym-Latn_eng-Latn)": 4.88, + "Tatoeba (deu-Latn_eng-Latn)": 11.46, + "Tatoeba (awa-Deva_eng-Latn)": 0.44, + "Tatoeba (ido-Latn_eng-Latn)": 9.84, + "Tatoeba (tat-Cyrl_eng-Latn)": 0.24, + "Tatoeba (kab-Latn_eng-Latn)": 1.31, + "Tatoeba (uzb-Latn_eng-Latn)": 1.98, + "Tatoeba (heb-Hebr_eng-Latn)": 0.28, + "Tatoeba (ara-Arab_eng-Latn)": 0.1, + "Tatoeba (fry-Latn_eng-Latn)": 12.43, + "Tatoeba (afr-Latn_eng-Latn)": 6.08, + "Tatoeba (kur-Latn_eng-Latn)": 3.65, + "Tatoeba (pms-Latn_eng-Latn)": 7.63, + "Tatoeba (ell-Grek_eng-Latn)": 0.0, + "Tatoeba (spa-Latn_eng-Latn)": 10.12, + "Tatoeba (dsb-Latn_eng-Latn)": 2.96, + "Tatoeba (uig-Arab_eng-Latn)": 0.33, + "Tatoeba (nld-Latn_eng-Latn)": 9.29, + "Tatoeba (tel-Telu_eng-Latn)": 0.73, + "Tatoeba (hrv-Latn_eng-Latn)": 3.77, + "Tatoeba (nds-Latn_eng-Latn)": 10.96, + "Tatoeba (hun-Latn_eng-Latn)": 3.23, + "Tatoeba (est-Latn_eng-Latn)": 2.35, + "Tatoeba (mal-Mlym_eng-Latn)": 0.15, + "Tatoeba (khm-Khmr_eng-Latn)": 0.28, + "Tatoeba (hsb-Latn_eng-Latn)": 3.12, + "Tatoeba (tgl-Latn_eng-Latn)": 4.06, + "Tatoeba (ang-Latn_eng-Latn)": 9.77, + "Tatoeba (tur-Latn_eng-Latn)": 3.16, + "Tatoeba (tuk-Latn_eng-Latn)": 2.23, + "Tatoeba (ile-Latn_eng-Latn)": 17.84, + "Tatoeba (mon-Cyrl_eng-Latn)": 0.81, + "Tatoeba (yue-Hant_eng-Latn)": 1.16, + "Tatoeba (ina-Latn_eng-Latn)": 22.55, + "Tatoeba (tam-Taml_eng-Latn)": 0.73, + "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5, + "Tatoeba (dan-Latn_eng-Latn)": 10.01, + "Tatoeba (arq-Arab_eng-Latn)": 0.33, + "Tatoeba (arz-Arab_eng-Latn)": 0.0, + "Tatoeba (fin-Latn_eng-Latn)": 3.82, + "Tatoeba (ind-Latn_eng-Latn)": 4.88 } ] }, "Classification": { "accuracy": [ { - "Model": "bge-small-zh-v1.5", - "AmazonReviewsClassification (zh)": 35.91, - "IFlyTek": 45.49, - "JDReview": 80.04, - "MassiveIntentClassification (zh-CN)": 63.95, - "MassiveScenarioClassification (zh-CN)": 70.8, - "MultilingualSentiment": 63.06, - "OnlineShopping": 85.05, - "TNews": 48.15, - "Waimai": 83.18 + "Model": "all-mpnet-base-v2", + "AllegroReviews (pol-Latn)": 22.99, + "AmazonCounterfactualClassification (en-ext)": 67.5, + "AmazonCounterfactualClassification (en)": 65.03, + "AmazonCounterfactualClassification (deu-Latn)": 55.66, + "AmazonCounterfactualClassification (jpn-Jpan)": 60.69, + "AmazonPolarityClassification": 67.14, + "AmazonReviewsClassification (en)": 31.44, + "AmazonReviewsClassification (deu-Latn)": 26.05, + "AmazonReviewsClassification (spa-Latn)": 27.73, + "AmazonReviewsClassification (fra-Latn)": 28.49, + "AmazonReviewsClassification (jpn-Jpan)": 23.65, + "AmazonReviewsClassification (cmn-Hans)": 23.62, + "AngryTweetsClassification (dan-Latn)": 44.13, + "Banking77Classification": 81.7, + "CBD (pol-Latn)": 50.25, + "DanishPoliticalCommentsClassification (dan-Latn)": 28.31, + "EmotionClassification": 42.22, + "GeoreviewClassification (rus-Cyrl)": 25.93, + "HeadlineClassification (rus-Cyrl)": 28.53, + "IFlyTek (cmn-Hans)": 17.18, + "ImdbClassification": 71.17, + "InappropriatenessClassification (rus-Cyrl)": 51.82, + "JDReview (cmn-Hans)": 60.19, + "KinopoiskClassification (rus-Cyrl)": 34.18, + "LccSentimentClassification (dan-Latn)": 39.27, + "MTOPDomainClassification (en)": 91.89, + "MTOPDomainClassification (deu-Latn)": 71.86, + "MTOPDomainClassification (spa-Latn)": 71.3, + "MTOPDomainClassification (fra-Latn)": 74.88, + "MTOPDomainClassification (hin-Deva)": 39.93, + "MTOPDomainClassification (tha-Thai)": 17.54, + "MTOPIntentClassification (en)": 68.27, + "MTOPIntentClassification (deu-Latn)": 44.36, + "MTOPIntentClassification (spa-Latn)": 39.48, + "MTOPIntentClassification (fra-Latn)": 37.57, + "MTOPIntentClassification (hin-Deva)": 18.63, + "MTOPIntentClassification (tha-Thai)": 5.39, + "MasakhaNEWSClassification (amh-Ethi)": 36.49, + "MasakhaNEWSClassification (eng)": 79.75, + "MasakhaNEWSClassification (fra-Latn)": 77.77, + "MasakhaNEWSClassification (hau-Latn)": 59.22, + "MasakhaNEWSClassification (ibo-Latn)": 61.64, + "MasakhaNEWSClassification (lin-Latn)": 74.0, + "MasakhaNEWSClassification (lug-Latn)": 58.43, + "MasakhaNEWSClassification (orm-Ethi)": 48.15, + "MasakhaNEWSClassification (pcm-Latn)": 92.2, + "MasakhaNEWSClassification (run-Latn)": 64.72, + "MasakhaNEWSClassification (sna-Latn)": 73.69, + "MasakhaNEWSClassification (som-Latn)": 49.97, + "MasakhaNEWSClassification (swa-Latn)": 55.15, + "MasakhaNEWSClassification (tir-Ethi)": 27.46, + "MasakhaNEWSClassification (xho-Latn)": 60.98, + "MasakhaNEWSClassification (yor-Latn)": 63.33, + "MassiveIntentClassification (en)": 69.76, + "MassiveIntentClassification (ara-Arab)": 20.42, + "MassiveIntentClassification (isl-Latn)": 31.46, + "MassiveIntentClassification (rus-Cyrl)": 23.98, + "MassiveIntentClassification (hun-Latn)": 34.38, + "MassiveIntentClassification (pol-Latn)": 34.26, + "MassiveIntentClassification (lav-Latn)": 35.08, + "MassiveIntentClassification (msa-Latn)": 30.53, + "MassiveIntentClassification (ind-Latn)": 36.31, + "MassiveIntentClassification (kan-Knda)": 3.76, + "MassiveIntentClassification (tam-Taml)": 9.25, + "MassiveIntentClassification (ron-Latn)": 38.07, + "MassiveIntentClassification (por-Latn)": 42.83, + "MassiveIntentClassification (jpn-Jpan)": 33.13, + "MassiveIntentClassification (tgl-Latn)": 36.33, + "MassiveIntentClassification (amh-Ethi)": 2.4, + "MassiveIntentClassification (fin-Latn)": 34.58, + "MassiveIntentClassification (hye-Armn)": 10.11, + "MassiveIntentClassification (nld-Latn)": 38.49, + "MassiveIntentClassification (tur-Latn)": 32.02, + "MassiveIntentClassification (urd-Arab)": 12.86, + "MassiveIntentClassification (cym-Latn)": 30.82, + "MassiveIntentClassification (fra-Latn)": 44.27, + "MassiveIntentClassification (aze-Latn)": 28.92, + "MassiveIntentClassification (ben-Beng)": 12.35, + "MassiveIntentClassification (mon-Cyrl)": 19.65, + "MassiveIntentClassification (ita-Latn)": 40.29, + "MassiveIntentClassification (tel-Telu)": 2.26, + "MassiveIntentClassification (kat-Geor)": 7.66, + "MassiveIntentClassification (hin-Deva)": 17.68, + "MassiveIntentClassification (fas-Arab)": 22.45, + "MassiveIntentClassification (swe-Latn)": 39.02, + "MassiveIntentClassification (heb-Hebr)": 23.6, + "MassiveIntentClassification (sqi-Latn)": 37.26, + "MassiveIntentClassification (mal-Mlym)": 2.62, + "MassiveIntentClassification (vie-Latn)": 31.47, + "MassiveIntentClassification (mya-Mymr)": 4.6, + "MassiveIntentClassification (jav-Latn)": 31.75, + "MassiveIntentClassification (cmo-Hans)": 24.36, + "MassiveIntentClassification (swa-Latn)": 31.82, + "MassiveIntentClassification (nob-Latn)": 39.3, + "MassiveIntentClassification (cmo-Hant)": 22.43, + "MassiveIntentClassification (ell-Grek)": 24.52, + "MassiveIntentClassification (deu-Latn)": 44.54, + "MassiveIntentClassification (tha-Thai)": 8.51, + "MassiveIntentClassification (dan-Latn)": 42.36, + "MassiveIntentClassification (afr-Latn)": 36.49, + "MassiveIntentClassification (spa-Latn)": 39.75, + "MassiveIntentClassification (kor-Kore)": 13.35, + "MassiveIntentClassification (slv-Latn)": 34.49, + "MassiveIntentClassification (khm-Khmr)": 4.76, + "MassiveScenarioClassification (en)": 75.67, + "MassiveScenarioClassification (kor-Kore)": 17.28, + "MassiveScenarioClassification (swe-Latn)": 44.53, + "MassiveScenarioClassification (hye-Armn)": 16.86, + "MassiveScenarioClassification (nob-Latn)": 45.75, + "MassiveScenarioClassification (pol-Latn)": 42.66, + "MassiveScenarioClassification (ind-Latn)": 43.05, + "MassiveScenarioClassification (ita-Latn)": 51.37, + "MassiveScenarioClassification (tgl-Latn)": 47.04, + "MassiveScenarioClassification (jav-Latn)": 40.0, + "MassiveScenarioClassification (lav-Latn)": 39.28, + "MassiveScenarioClassification (mya-Mymr)": 10.8, + "MassiveScenarioClassification (por-Latn)": 52.06, + "MassiveScenarioClassification (tel-Telu)": 7.81, + "MassiveScenarioClassification (deu-Latn)": 54.09, + "MassiveScenarioClassification (fas-Arab)": 27.8, + "MassiveScenarioClassification (hin-Deva)": 23.13, + "MassiveScenarioClassification (hun-Latn)": 41.01, + "MassiveScenarioClassification (vie-Latn)": 35.9, + "MassiveScenarioClassification (fra-Latn)": 54.26, + "MassiveScenarioClassification (jpn-Jpan)": 40.57, + "MassiveScenarioClassification (tha-Thai)": 17.01, + "MassiveScenarioClassification (swa-Latn)": 40.34, + "MassiveScenarioClassification (ell-Grek)": 33.85, + "MassiveScenarioClassification (aze-Latn)": 36.42, + "MassiveScenarioClassification (heb-Hebr)": 25.49, + "MassiveScenarioClassification (kat-Geor)": 13.45, + "MassiveScenarioClassification (afr-Latn)": 43.63, + "MassiveScenarioClassification (ben-Beng)": 17.49, + "MassiveScenarioClassification (cym-Latn)": 34.82, + "MassiveScenarioClassification (mon-Cyrl)": 25.58, + "MassiveScenarioClassification (tur-Latn)": 39.11, + "MassiveScenarioClassification (tam-Taml)": 14.55, + "MassiveScenarioClassification (ara-Arab)": 27.8, + "MassiveScenarioClassification (msa-Latn)": 37.28, + "MassiveScenarioClassification (cmo-Hant)": 31.7, + "MassiveScenarioClassification (dan-Latn)": 49.45, + "MassiveScenarioClassification (kan-Knda)": 8.34, + "MassiveScenarioClassification (urd-Arab)": 20.0, + "MassiveScenarioClassification (cmo-Hans)": 35.33, + "MassiveScenarioClassification (amh-Ethi)": 7.43, + "MassiveScenarioClassification (ron-Latn)": 47.86, + "MassiveScenarioClassification (fin-Latn)": 38.41, + "MassiveScenarioClassification (isl-Latn)": 39.36, + "MassiveScenarioClassification (sqi-Latn)": 44.67, + "MassiveScenarioClassification (spa-Latn)": 50.92, + "MassiveScenarioClassification (mal-Mlym)": 7.69, + "MassiveScenarioClassification (slv-Latn)": 39.88, + "MassiveScenarioClassification (nld-Latn)": 47.79, + "MassiveScenarioClassification (khm-Khmr)": 9.63, + "MassiveScenarioClassification (rus-Cyrl)": 28.71, + "MultilingualSentiment (cmn-Hans)": 41.2, + "NoRecClassification (nob-Latn)": 38.34, + "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15, + "OnlineShopping (cmn-Hans)": 56.94, + "PAC (pol-Latn)": 62.1, + "PolEmo2.0-IN (pol-Latn)": 41.63, + "PolEmo2.0-OUT (pol-Latn)": 25.0, + "RuReviewsClassification (rus-Cyrl)": 42.33, + "RuSciBenchGRNTIClassification (rus-Cyrl)": 13.29, + "RuSciBenchOECDClassification (rus-Cyrl)": 10.62, + "TNews (cmn-Hans)": 21.05, + "ToxicConversationsClassification": 61.05, + "TweetSentimentExtractionClassification": 55.05, + "Waimai (cmn-Hans)": 63.31 } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-small-zh-v1.5", - "CLSClusteringP2P": 38.14, - "CLSClusteringS2S": 35.14, - "ThuNewsClusteringP2P": 54.22, - "ThuNewsClusteringS2S": 49.22 + "Model": "all-mpnet-base-v2", + "ArxivClusteringP2P": 48.38, + "ArxivClusteringS2S": 39.72, + "BiorxivClusteringP2P": 39.62, + "BiorxivClusteringS2S": 35.02, + "GeoreviewClusteringP2P (rus-Cyrl)": 20.33, + "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49, + "MasakhaNEWSClusteringP2P (eng)": 67.24, + "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99, + "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0, + "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68, + "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01, + "MasakhaNEWSClusteringP2P (run-Latn)": 57.6, + "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99, + "MasakhaNEWSClusteringP2P (som-Latn)": 31.16, + "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85, + "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24, + "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48, + "MasakhaNEWSClusteringS2S (eng)": 35.69, + "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05, + "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63, + "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72, + "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7, + "MasakhaNEWSClusteringS2S (run-Latn)": 52.27, + "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64, + "MasakhaNEWSClusteringS2S (som-Latn)": 30.94, + "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01, + "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16, + "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04, + "MedrxivClusteringP2P": 35.58, + "MedrxivClusteringS2S": 32.87, + "RedditClustering": 54.82, + "RedditClusteringP2P": 56.77, + "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 14.66, + "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 12.49, + "StackExchangeClustering": 53.8, + "StackExchangeClusteringP2P": 34.28, + "TwentyNewsgroupsClustering": 49.74 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cosine_ap": [ { - "Model": "bge-small-zh-v1.5", - "Cmnli": 76.24, - "Ocnli": 64.57 + "Model": "all-mpnet-base-v2", + "CDSC-E (pol-Latn)": 45.37, + "OpusparcusPC (deu-Latn)": 89.78, + "OpusparcusPC (en)": 97.75, + "OpusparcusPC (fin-Latn)": 85.82, + "OpusparcusPC (fra-Latn)": 86.61, + "OpusparcusPC (rus-Cyrl)": 79.85, + "OpusparcusPC (swe-Latn)": 81.81, + "PSC (pol-Latn)": 83.28, + "PawsXPairClassification (deu-Latn)": 52.17, + "PawsXPairClassification (en)": 61.99, + "PawsXPairClassification (spa-Latn)": 55.06, + "PawsXPairClassification (fra-Latn)": 56.42, + "PawsXPairClassification (jpn-Hira)": 47.43, + "PawsXPairClassification (kor-Hang)": 49.75, + "PawsXPairClassification (cmn-Hans)": 52.47, + "SICK-E-PL (pol-Latn)": 46.51, + "SprintDuplicateQuestions": 90.15, + "TERRa (rus-Cyrl)": 44.52, + "TwitterSemEval2015": 73.85, + "TwitterURLCorpus": 85.11 } ] }, "Reranking": { "map": [ { - "Model": "bge-small-zh-v1.5", - "CMedQAv1": 77.4, - "CMedQAv2": 79.86, - "MMarcoReranking": 20.5, - "T2Reranking": 65.9 + "Model": "all-mpnet-base-v2", + "AlloprofReranking (fra-Latn)": 69.63, + "AskUbuntuDupQuestions": 65.85, + "MMarcoReranking (cmn-Hans)": 4.65, + "MindSmallReranking": 30.97, + "RuBQReranking (rus-Cyrl)": 30.96, + "SciDocsRR": 88.65, + "StackOverflowDupQuestions": 51.98, + "SyntecReranking (fra-Latn)": 66.12, + "T2Reranking (cmn-Hans)": 58.3 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-small-zh-v1.5", - "CmedqaRetrieval": 35.11, - "CovidRetrieval": 70.14, - "DuRetrieval": 77.28, - "EcomRetrieval": 55.71, - "MMarcoRetrieval": 63.48, - "MedicalRetrieval": 49.8, - "T2Retrieval": 76.43, - "VideoRetrieval": 66.19 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "bge-small-zh-v1.5", - "AFQMC": 33.42, - "ATEC": 43.01, - "BQ": 55.22, - "LCQMC": 72.19, - "PAWSX": 9.26, - "QBQTC": 35.29, - "STS22 (zh)": 67.72, - "STSB": 76.73 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "bge-small-zh-v1.5" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "bge-small-zh-v1.5" - } - ] - } - }, - "text-embedding-3-large": { - "BitextMining": { - "f1": [ - { - "Model": "text-embedding-3-large" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "text-embedding-3-large", - "AmazonCounterfactualClassification (en)": 78.93, - "AmazonPolarityClassification": 92.85, - "AmazonReviewsClassification (en)": 48.7, - "Banking77Classification": 85.69, - "EmotionClassification": 51.58, - "ImdbClassification": 87.67, - "MTOPDomainClassification (en)": 95.36, - "MTOPIntentClassification (en)": 75.07, - "MassiveIntentClassification (en)": 74.64, - "MassiveScenarioClassification (en)": 79.79, - "ToxicConversationsClassification": 72.92, - "TweetSentimentExtractionClassification": 62.22 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "text-embedding-3-large", - "ArxivClusteringP2P": 49.01, - "ArxivClusteringS2S": 44.45, - "BiorxivClusteringP2P": 38.03, - "BiorxivClusteringS2S": 36.53, - "MedrxivClusteringP2P": 32.7, - "MedrxivClusteringS2S": 31.27, - "RedditClustering": 67.84, - "RedditClusteringP2P": 67.96, - "StackExchangeClustering": 76.26, - "StackExchangeClusteringP2P": 36.88, - "TwentyNewsgroupsClustering": 58.14 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "text-embedding-3-large", - "SprintDuplicateQuestions": 92.25, - "TwitterSemEval2015": 77.13, - "TwitterURLCorpus": 87.78 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "text-embedding-3-large", - "AskUbuntuDupQuestions": 65.03, - "MindSmallReranking": 29.86, - "SciDocsRR": 86.66, - "StackOverflowDupQuestions": 55.08 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ + "Model": "all-mpnet-base-v2", + "AILACasedocs": 22.51, + "AILAStatutes": 21.27, + "ARCChallenge": 11.8, + "AlloprofRetrieval (fra-Latn)": 34.27, + "AlphaNLI": 22.41, + "AppsRetrieval (eng-Latn_python-Code)": 8.41, + "ArguAna": 46.52, + "ArguAna-PL (pol-Latn)": 14.72, + "BSARDRetrieval (fra-Latn)": 6.98, + "CQADupstackRetrieval": 44.96, + "ClimateFEVER": 21.97, + "CmedqaRetrieval (cmn-Hans)": 2.0, + "CodeFeedbackMT (c-Code_sql-Code_python-Code_shell-Code_swift-Code_eng-Latn)": 37.72, + "CodeFeedbackST (python-Code_javascript-Code_go-Code_ruby-Code_java-Code_php-Code_eng-Latn)": 69.32, + "CodeSearchNetCCRetrieval (python-Code)": 71.83, + "CodeSearchNetCCRetrieval (javascript-Code)": 67.99, + "CodeSearchNetCCRetrieval (go-Code)": 61.44, + "CodeSearchNetCCRetrieval (ruby-Code)": 66.42, + "CodeSearchNetCCRetrieval (java-Code)": 68.88, + "CodeSearchNetCCRetrieval (php-Code)": 59.22, + "CodeSearchNetRetrieval (python-Code)": 81.01, + "CodeSearchNetRetrieval (javascript-Code)": 65.58, + "CodeSearchNetRetrieval (go-Code)": 88.25, + "CodeSearchNetRetrieval (ruby-Code)": 74.53, + "CodeSearchNetRetrieval (java-Code)": 65.11, + "CodeSearchNetRetrieval (php-Code)": 74.47, + "CodeTransOceanContest (python-Code_c++-Code)": 70.58, + "CodeTransOceanDL": 29.45, + "CosQA (eng-Latn_python-Code)": 33.71, + "CovidRetrieval (cmn-Hans)": 3.7, + "DBPedia": 32.09, + "DuRetrieval (cmn-Hans)": 4.92, + "EcomRetrieval (cmn-Hans)": 3.94, + "FEVER": 50.86, + "FiQA-PL (pol-Latn)": 3.6, + "FiQA2018": 49.96, + "GerDaLIRSmall (deu-Latn)": 3.78, + "HellaSwag": 26.27, + "HotpotQA": 39.29, + "LEMBNarrativeQARetrieval": 19.34, + "LEMBQMSumRetrieval": 21.54, + "LEMBSummScreenFDRetrieval": 60.43, + "LEMBWikimQARetrieval": 44.92, + "LeCaRDv2 (zho-Hans)": 18.09, + "LegalBenchConsumerContractsQA": 75.25, + "LegalBenchCorporateLobbying": 89.04, + "LegalQuAD (deu-Latn)": 10.67, + "LegalSummarization": 58.55, + "MMarcoRetrieval (cmn-Hans)": 7.13, + "MSMARCO": 39.75, + "MedicalRetrieval (cmn-Hans)": 1.71, + "MintakaRetrieval (ara-Arab)": 1.97, + "MintakaRetrieval (deu-Latn)": 17.21, + "MintakaRetrieval (spa-Latn)": 10.11, + "MintakaRetrieval (fra-Latn)": 12.93, + "MintakaRetrieval (hin-Deva)": 2.03, + "MintakaRetrieval (ita-Latn)": 5.63, + "MintakaRetrieval (jpn-Hira)": 6.77, + "MintakaRetrieval (por-Latn)": 8.05, + "NFCorpus": 33.29, + "NFCorpus-PL (pol-Latn)": 8.77, + "NQ": 50.45, + "PIQA": 29.03, + "Quail": 3.41, + "QuoraRetrieval": 87.46, + "RARbCode": 53.21, + "RARbMath": 71.85, + "RuBQRetrieval (rus-Cyrl)": 4.75, + "SCIDOCS": 23.76, + "SCIDOCS-PL (pol-Latn)": 4.02, + "SIQA": 2.38, + "SciFact": 65.57, + "SciFact-PL (pol-Latn)": 13.31, + "SpartQA": 0.22, + "StackOverflowQA": 90.32, + "SyntecRetrieval (fra-Latn)": 57.39, + "SyntheticText2SQL (eng-Latn_sql-Code)": 45.09, + "T2Retrieval (cmn-Hans)": 2.98, + "TRECCOVID": 51.33, + "TRECCOVID-PL (pol-Latn)": 12.11, + "TempReasonL1": 1.77, + "TempReasonL2Fact": 11.2, + "TempReasonL2Pure": 1.15, + "TempReasonL3Fact": 9.42, + "TempReasonL3Pure": 5.59, + "Touche2020": 19.93, + "VideoRetrieval (cmn-Hans)": 8.48, + "WinoGrande": 20.77, + "XPQARetrieval (ara-Arab_ara-Arab)": 9.42, + "XPQARetrieval (eng-Latn_ara-Arab)": 2.36, + "XPQARetrieval (ara-Arab_eng-Latn)": 8.98, + "XPQARetrieval (deu-Latn_deu-Latn)": 55.82, + "XPQARetrieval (eng-Latn_deu-Latn)": 11.74, + "XPQARetrieval (deu-Latn_eng-Latn)": 30.44, + "XPQARetrieval (spa-Latn_spa-Latn)": 40.01, + "XPQARetrieval (eng-Latn_spa-Latn)": 6.12, + "XPQARetrieval (spa-Latn_eng-Latn)": 29.44, + "XPQARetrieval (fra-Latn_fra-Latn)": 51.94, + "XPQARetrieval (eng-Latn_fra-Latn)": 11.48, + "XPQARetrieval (fra-Latn_eng-Latn)": 32.52, + "XPQARetrieval (hin-Deva_hin-Deva)": 37.45, + "XPQARetrieval (eng-Latn_hin-Deva)": 5.11, + "XPQARetrieval (hin-Deva_eng-Latn)": 7.37, + "XPQARetrieval (ita-Latn_ita-Latn)": 54.2, + "XPQARetrieval (eng-Latn_ita-Latn)": 6.08, + "XPQARetrieval (ita-Latn_eng-Latn)": 30.32, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.46, + "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79, + "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77, + "XPQARetrieval (kor-Hang_kor-Hang)": 10.39, + "XPQARetrieval (eng-Latn_kor-Hang)": 7.09, + "XPQARetrieval (kor-Hang_eng-Latn)": 6.96, + "XPQARetrieval (pol-Latn_pol-Latn)": 23.71, + "XPQARetrieval (eng-Latn_pol-Latn)": 8.83, + "XPQARetrieval (pol-Latn_eng-Latn)": 15.94, + "XPQARetrieval (por-Latn_por-Latn)": 33.56, + "XPQARetrieval (eng-Latn_por-Latn)": 3.76, + "XPQARetrieval (por-Latn_eng-Latn)": 23.45, + "XPQARetrieval (tam-Taml_tam-Taml)": 5.5, + "XPQARetrieval (eng-Latn_tam-Taml)": 3.3, + "XPQARetrieval (tam-Taml_eng-Latn)": 4.18, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.8, + "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2, + "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84 + }, { - "Model": "text-embedding-3-large", - "AILACasedocs": 39.0, - "AILAStatutes": 41.31, - "ARCChallenge": 23.98, - "AlphaNLI": 37.27, - "ArguAna": 58.05, - "BrightRetrieval (theoremqa_questions)": 22.22, - "BrightRetrieval (leetcode)": 23.65, - "BrightRetrieval (earth_science)": 26.27, - "BrightRetrieval (psychology)": 27.52, - "BrightRetrieval (robotics)": 12.93, - "BrightRetrieval (economics)": 19.98, - "BrightRetrieval (stackoverflow)": 12.49, - "BrightRetrieval (biology)": 23.67, - "BrightRetrieval (theoremqa_theorems)": 9.25, - "BrightRetrieval (pony)": 2.45, - "BrightRetrieval (sustainable_living)": 20.32, - "BrightRetrieval (aops)": 8.45, - "CQADupstackRetrieval": 47.54, - "ClimateFEVER": 30.27, - "DBPedia": 44.76, - "FEVER": 87.94, - "FiQA2018": 55.0, - "GerDaLIRSmall": 32.77, - "HellaSwag": 34.12, - "HotpotQA": 71.58, - "LEMBNarrativeQARetrieval": 44.09, - "LEMBNeedleRetrieval": 29.25, - "LEMBPasskeyRetrieval": 63.0, - "LEMBQMSumRetrieval": 32.49, - "LEMBSummScreenFDRetrieval": 84.8, - "LEMBWikimQARetrieval": 54.16, - "LeCaRDv2": 57.2, - "LegalBenchConsumerContractsQA": 79.39, - "LegalBenchCorporateLobbying": 95.09, - "LegalQuAD": 57.47, - "LegalSummarization": 71.55, - "MSMARCO": 40.24, - "NFCorpus": 42.07, - "NQ": 61.27, - "PIQA": 41.96, - "Quail": 10.15, - "QuoraRetrieval": 89.05, - "RARbCode": 89.64, - "RARbMath": 90.08, - "SCIDOCS": 23.11, - "SIQA": 3.44, - "SciFact": 77.77, - "SpartQA": 7.51, - "TRECCOVID": 79.56, - "TempReasonL1": 2.13, - "TempReasonL2Fact": 28.65, - "TempReasonL2Pure": 10.34, - "TempReasonL3Fact": 25.52, - "TempReasonL3Pure": 15.28, - "Touche2020": 23.35, - "WinoGrande": 29.11 + "Model": "all-mpnet-base-v2", + "LEMBNeedleRetrieval": 16.0, + "LEMBPasskeyRetrieval": 24.5 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cosine_spearman": [ { - "Model": "text-embedding-3-large", - "BIOSSES": 84.68, - "SICK-R": 79.0, - "STS12": 72.84, - "STS13": 86.1, - "STS14": 81.15, - "STS15": 88.49, - "STS16": 85.08, - "STS17 (en-en)": 90.22, - "STS22 (en)": 66.14, - "STSBenchmark": 83.56 + "Model": "all-mpnet-base-v2", + "AFQMC (cmn-Hans)": 8.01, + "ATEC (cmn-Hans)": 14.03, + "BIOSSES": 80.43, + "BQ (cmn-Hans)": 21.39, + "CDSC-R (pol-Latn)": 77.04, + "LCQMC (cmn-Hans)": 22.84, + "PAWSX (cmn-Hans)": 6.44, + "RUParaPhraserSTS (rus-Cyrl)": 42.15, + "RuSTSBenchmarkSTS (rus-Cyrl)": 55.68, + "SICK-R": 80.59, + "SICK-R-PL (pol-Latn)": 50.2, + "SICKFr (fra-Latn)": 67.05, + "STS12": 72.63, + "STS13": 83.48, + "STS14": 78.0, + "STS15": 85.66, + "STS16": 80.03, + "STS17 (fra-Latn_eng-Latn)": 41.64, + "STS17 (nld-Latn_eng-Latn)": 32.89, + "STS17 (spa-Latn_eng-Latn)": 25.28, + "STS17 (en-en)": 90.6, + "STS17 (kor-Hang)": 39.11, + "STS17 (ara-Arab)": 55.42, + "STS17 (spa-Latn)": 78.4, + "STS17 (eng-Latn_deu-Latn)": 35.5, + "STS17 (eng-Latn_ara-Arab)": 6.76, + "STS17 (eng-Latn_tur-Latn)": -4.58, + "STS17 (ita-Latn_eng-Latn)": 31.8, + "STS22 (pol-Latn)": 24.21, + "STS22 (ita-Latn)": 58.02, + "STS22 (spa-Latn_eng-Latn)": 55.09, + "STS22 (fra-Latn)": 77.1, + "STS22 (tur-Latn)": 29.35, + "STS22 (cmn-Hans)": 42.24, + "STS22 (deu-Latn)": 27.0, + "STS22 (spa-Latn_ita-Latn)": 41.61, + "STS22 (fra-Latn_pol-Latn)": 73.25, + "STS22 (deu-Latn_eng-Latn)": 49.73, + "STS22 (cmn-Hans_eng-Latn)": 40.47, + "STS22 (spa-Latn)": 55.98, + "STS22 (ara-Arab)": 38.96, + "STS22 (en)": 68.39, + "STS22 (deu-Latn_pol-Latn)": 23.53, + "STS22 (rus-Cyrl)": 15.83, + "STS22 (pol-Latn_eng-Latn)": 51.07, + "STS22 (deu-Latn_fra-Latn)": 31.39, + "STSB (cmn-Hans)": 37.7, + "STSBenchmark": 83.42, + "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54, + "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15, + "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72, + "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78, + "STSBenchmarkMultilingualSTS (en)": 83.42, + "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43, + "STSBenchmarkMultilingualSTS (por-Latn)": 62.12, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43, + "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36 + }, + { + "Model": "all-mpnet-base-v2", + "STS17 (en-en)": 90.6, + "STS22 (en)": 67.95 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cosine_spearman": [ { - "Model": "text-embedding-3-large", - "SummEval": 29.92 + "Model": "all-mpnet-base-v2", + "SummEval": 27.49, + "SummEvalFr (fra-Latn)": 28.11 + }, + { + "Model": "all-mpnet-base-v2", + "SummEval": 27.49, + "SummEvalFr (fra-Latn)": 28.11 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text-embedding-3-large", - "Core17InstructionRetrieval": -0.2, - "News21InstructionRetrieval": -2.03, - "Robust04InstructionRetrieval": -5.81 + "Model": "all-mpnet-base-v2", + "Core17InstructionRetrieval": -0.74, + "News21InstructionRetrieval": -1.79, + "Robust04InstructionRetrieval": -6.71 } ] } }, - "nb-bert-large": { + "instructor-xl": { "BitextMining": { - "f1": [ - { - "Model": "nb-bert-large", - "BornholmBitextMining": 4.53 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "nb-bert-large", - "AngryTweetsClassification": 52.14, - "DKHateClassification": 62.13, - "DanishPoliticalCommentsClassification": 35.04, - "LccSentimentClassification": 56.27, - "MassiveIntentClassification (da)": 57.03, - "MassiveIntentClassification (nb)": 62.68, - "MassiveIntentClassification (sv)": 55.02, - "MassiveScenarioClassification (da)": 60.43, - "MassiveScenarioClassification (nb)": 67.44, - "MassiveScenarioClassification (sv)": 57.12, - "NoRecClassification": 55.46, - "NordicLangClassification": 85.27, - "NorwegianParliament": 62.58, - "ScalaDaClassification": 62.85, - "ScalaNbClassification": 66.97 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "nb-bert-large" - } - ] + "v_measure": [] }, "PairClassification": { - "ap": [ - { - "Model": "nb-bert-large" - } - ] + "ap": [] }, "Reranking": { - "map": [ + "map": [] + }, + "Retrieval": { + "ndcg_at_10": [ { - "Model": "nb-bert-large" + "Model": "instructor-xl", + "BrightRetrieval (aops)": 8.26, + "BrightRetrieval (robotics)": 17.39, + "BrightRetrieval (economics)": 22.81, + "BrightRetrieval (stackoverflow)": 19.06, + "BrightRetrieval (leetcode)": 27.5, + "BrightRetrieval (theoremqa_questions)": 14.59, + "BrightRetrieval (psychology)": 27.43, + "BrightRetrieval (biology)": 21.91, + "BrightRetrieval (theoremqa_theorems)": 6.22, + "BrightRetrieval (earth_science)": 34.35, + "BrightRetrieval (sustainable_living)": 18.82, + "BrightRetrieval (pony)": 5.02 } - ] - }, - "Retrieval": { - "ndcg_at_10": [ + ], + "recall_at_1": [ { - "Model": "nb-bert-large" + "Model": "instructor-xl", + "BrightRetrieval (stackoverflow)": 14.96, + "BrightRetrieval (biology)": 22.01, + "BrightRetrieval (sustainable_living)": 20.14, + "BrightRetrieval (pony)": 5.93, + "BrightRetrieval (psychology)": 20.5, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (economics)": 14.08, + "BrightRetrieval (earth_science)": 32.04 } ] }, "STS": { - "spearman": [ - { - "Model": "nb-bert-large" - } - ] + "spearman": [] }, "Summarization": { - "spearman": [ - { - "Model": "nb-bert-large" - } - ] + "spearman": [] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "nb-bert-large" + "Model": "instructor-xl", + "Core17InstructionRetrieval": 0.69, + "News21InstructionRetrieval": -0.9, + "Robust04InstructionRetrieval": -8.08 } ] } }, - "e5-large-v2": { + "google-gecko.text-embedding-preview-0409": { "BitextMining": { - "f1": [ - { - "Model": "e5-large-v2" - } - ] + "f1": [] }, "Classification": { "accuracy": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "AmazonCounterfactualClassification (en)": 75.34, + "AmazonPolarityClassification": 97.34, + "AmazonReviewsClassification (en)": 51.17, + "Banking77Classification": 88.62, + "EmotionClassification": 52.51, + "ImdbClassification": 95.65, + "MTOPDomainClassification (en)": 98.35, + "MTOPIntentClassification (en)": 83.43, + "MassiveIntentClassification (en)": 80.22, + "MassiveScenarioClassification (en)": 87.19, + "ToxicConversationsClassification": 89.67, + "TweetSentimentExtractionClassification": 74.52 } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-large-v2", - "BiorxivClusteringP2P": 36.72, - "BiorxivClusteringS2S": 35.47, - "MedrxivClusteringP2P": 31.45, - "MedrxivClusteringS2S": 29.91, - "RedditClustering": 55.5, - "RedditClusteringP2P": 63.71, - "StackExchangeClustering": 65.23, - "StackExchangeClusteringP2P": 33.62, - "TwentyNewsgroupsClustering": 48.73 + "Model": "google-gecko.text-embedding-preview-0409", + "ArxivClusteringP2P": 46.27, + "ArxivClusteringS2S": 38.36, + "BiorxivClusteringP2P": 37.87, + "BiorxivClusteringS2S": 35.67, + "MedrxivClusteringP2P": 33.11, + "MedrxivClusteringS2S": 31.54, + "RedditClustering": 65.81, + "RedditClusteringP2P": 66.62, + "StackExchangeClustering": 74.52, + "StackExchangeClusteringP2P": 37.63, + "TwentyNewsgroupsClustering": 54.87 } ] }, "PairClassification": { - "ap": [ + "ap": [], + "cos_sim_ap": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "SprintDuplicateQuestions": 96.26, + "TwitterSemEval2015": 79.04, + "TwitterURLCorpus": 87.53 } ] }, "Reranking": { "map": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "AskUbuntuDupQuestions": 64.4, + "MindSmallReranking": 33.07, + "SciDocsRR": 83.59, + "StackOverflowDupQuestions": 54.56 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "ArguAna": 62.18, + "BrightRetrieval (earth_science)": 34.38, + "BrightRetrieval (leetcode)": 29.64, + "BrightRetrieval (theoremqa_questions)": 21.51, + "BrightRetrieval (aops)": 9.33, + "BrightRetrieval (sustainable_living)": 17.25, + "BrightRetrieval (pony)": 3.59, + "BrightRetrieval (theoremqa_theorems)": 14.31, + "BrightRetrieval (stackoverflow)": 17.93, + "BrightRetrieval (biology)": 22.98, + "BrightRetrieval (robotics)": 15.98, + "BrightRetrieval (economics)": 19.5, + "BrightRetrieval (psychology)": 27.86, + "CQADupstackRetrieval": 48.89, + "ClimateFEVER": 33.21, + "DBPedia": 47.12, + "FEVER": 86.96, + "FiQA2018": 59.24, + "HotpotQA": 71.33, + "MSMARCO": 32.58, + "NFCorpus": 40.33, + "NQ": 61.28, + "QuoraRetrieval": 88.18, + "SCIDOCS": 20.34, + "SciFact": 75.42, + "TRECCOVID": 82.62, + "Touche2020": 25.86 + } + ], + "recall_at_1": [ + { + "Model": "google-gecko.text-embedding-preview-0409", + "BrightRetrieval (economics)": 21.84, + "BrightRetrieval (stackoverflow)": 19.23, + "BrightRetrieval (pony)": 0.29, + "BrightRetrieval (earth_science)": 38.0, + "BrightRetrieval (sustainable_living)": 25.65, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (psychology)": 30.69, + "BrightRetrieval (biology)": 30.91 } ] }, "STS": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "BIOSSES": 89.46, + "SICK-R": 81.93, + "STS12": 77.59, + "STS13": 90.36, + "STS14": 85.25, + "STS15": 89.66, + "STS16": 87.34, + "STS17 (en-en)": 92.06, + "STS22 (en)": 68.02, + "STSBenchmark": 88.99 } ] }, "Summarization": { - "spearman": [ + "spearman": [], + "cos_sim_spearman": [ { - "Model": "e5-large-v2" + "Model": "google-gecko.text-embedding-preview-0409", + "SummEval": 32.63 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-large-v2", - "Core17InstructionRetrieval": 0.12, - "News21InstructionRetrieval": 0.87, - "Robust04InstructionRetrieval": -4.16 + "Model": "google-gecko.text-embedding-preview-0409", + "Core17InstructionRetrieval": 5.44, + "News21InstructionRetrieval": 3.94, + "Robust04InstructionRetrieval": -2.4 } ] } }, - "universal-sentence-encoder-multilingual-3": { + "instructor-large": { "BitextMining": { - "f1": [ - { - "Model": "universal-sentence-encoder-multilingual-3" - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "universal-sentence-encoder-multilingual-3", - "AmazonReviewsClassification (fr)": 33.51, - "MTOPDomainClassification (fr)": 85.5, - "MTOPIntentClassification (fr)": 53.98, - "MasakhaNEWSClassification (fra)": 82.06, - "MassiveIntentClassification (fr)": 61.19, - "MassiveScenarioClassification (fr)": 70.22 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloProfClusteringP2P": 56.9, - "AlloProfClusteringS2S": 37.84, - "HALClusteringS2S": 18.95, - "MLSUMClusteringP2P": 43.9, - "MLSUMClusteringS2S": 35.5, - "MasakhaNEWSClusteringP2P (fra)": 60.57, - "MasakhaNEWSClusteringS2S (fra)": 40.31 - } - ] + "v_measure": [] }, "PairClassification": { - "ap": [ - { - "Model": "universal-sentence-encoder-multilingual-3", - "OpusparcusPC (fr)": 91.46, - "PawsXPairClassification (fr)": 52.39 - } - ] + "ap": [] }, "Reranking": { - "map": [ - { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloprofReranking": 56.23, - "SyntecReranking": 73.85 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "AlloprofRetrieval": 35.27, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 26.12, - "SyntecRetrieval": 69.82, - "XPQARetrieval (fr)": 59.59 + "Model": "instructor-large", + "BrightRetrieval (pony)": 1.32, + "BrightRetrieval (sustainable_living)": 13.16, + "BrightRetrieval (aops)": 7.94, + "BrightRetrieval (biology)": 15.61, + "BrightRetrieval (stackoverflow)": 11.21, + "BrightRetrieval (theoremqa_theorems)": 8.27, + "BrightRetrieval (psychology)": 21.94, + "BrightRetrieval (economics)": 15.99, + "BrightRetrieval (robotics)": 11.45, + "BrightRetrieval (leetcode)": 20.0, + "BrightRetrieval (earth_science)": 21.52, + "BrightRetrieval (theoremqa_questions)": 20.07 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "universal-sentence-encoder-multilingual-3", - "SICKFr": 71.37, - "STS22 (fr)": 77.91, - "STSBenchmarkMultilingualSTS (fr)": 75.48 + "Model": "instructor-large", + "BrightRetrieval (stackoverflow)": 14.53, + "BrightRetrieval (pony)": 3.94, + "BrightRetrieval (economics)": 14.08, + "BrightRetrieval (earth_science)": 29.45, + "BrightRetrieval (sustainable_living)": 25.42, + "BrightRetrieval (psychology)": 21.29, + "BrightRetrieval (robotics)": 12.87, + "BrightRetrieval (biology)": 24.11 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "universal-sentence-encoder-multilingual-3", - "SummEvalFr": 28.21 - } - ] + "spearman": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "universal-sentence-encoder-multilingual-3" - } - ] + "p-MRR": [] } }, - "LaBSE-ru-turbo": { + "bge-large-en-v1.5": { "BitextMining": { - "f1": [ - { - "Model": "LaBSE-ru-turbo", - "Tatoeba (rus-Cyrl_eng-Latn)": 93.22 - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "LaBSE-ru-turbo", - "GeoreviewClassification (rus-Cyrl)": 46.04, - "HeadlineClassification (rus-Cyrl)": 69.98, - "InappropriatenessClassification (rus-Cyrl)": 61.39, - "KinopoiskClassification (rus-Cyrl)": 53.59, - "MassiveIntentClassification (rus-Cyrl)": 66.08, - "MassiveScenarioClassification (rus-Cyrl)": 71.13, - "RuReviewsClassification (rus-Cyrl)": 64.58, - "RuSciBenchGRNTIClassification (rus-Cyrl)": 56.67, - "RuSciBenchOECDClassification (rus-Cyrl)": 43.58 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "LaBSE-ru-turbo", - "GeoreviewClusteringP2P (rus-Cyrl)": 64.55, - "MLSUMClusteringP2P (rus-Cyrl)": 45.7, - "MLSUMClusteringS2S (rus-Cyrl)": 42.93, - "RuSciBenchGRNTIClusteringP2P (rus-Cyrl)": 50.64, - "RuSciBenchOECDClusteringP2P (rus-Cyrl)": 44.48 - } - ] + "v_measure": [] }, - "PairClassification": { - "ap": [ - { - "Model": "LaBSE-ru-turbo", - "OpusparcusPC (rus-Cyrl)": 89.32, - "TERRa (rus-Cyrl)": 57.81 - } - ] + "PairClassification": { + "ap": [] }, "Reranking": { - "map": [ - { - "Model": "LaBSE-ru-turbo", - "RuBQReranking (rus-Cyrl)": 68.65 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "LaBSE-ru-turbo", - "RiaNewsRetrieval (rus-Cyrl)": 69.36, - "RuBQRetrieval (rus-Cyrl)": 65.71 + "Model": "bge-large-en-v1.5", + "AILACasedocs": 25.15, + "AILAStatutes": 20.74, + "ARCChallenge": 9.99, + "AlphaNLI": 13.13, + "BrightRetrieval (stackoverflow)": 9.51, + "BrightRetrieval (earth_science)": 24.15, + "BrightRetrieval (aops)": 6.08, + "BrightRetrieval (sustainable_living)": 13.27, + "BrightRetrieval (psychology)": 17.44, + "BrightRetrieval (robotics)": 12.21, + "BrightRetrieval (theoremqa_theorems)": 6.72, + "BrightRetrieval (pony)": 5.64, + "BrightRetrieval (biology)": 11.96, + "BrightRetrieval (theoremqa_questions)": 12.56, + "BrightRetrieval (leetcode)": 26.68, + "BrightRetrieval (economics)": 16.59, + "GerDaLIRSmall": 3.96, + "HellaSwag": 28.5, + "LeCaRDv2": 22.68, + "LegalBenchConsumerContractsQA": 73.52, + "LegalBenchCorporateLobbying": 91.51, + "LegalQuAD": 16.22, + "LegalSummarization": 59.99, + "PIQA": 27.99, + "Quail": 1.83, + "RARbCode": 48.12, + "RARbMath": 57.36, + "SIQA": 1.04, + "SpartQA": 2.99, + "TempReasonL1": 1.46, + "TempReasonL2Fact": 24.25, + "TempReasonL2Pure": 2.35, + "TempReasonL3Fact": 20.64, + "TempReasonL3Pure": 6.67, + "WinoGrande": 19.18 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "LaBSE-ru-turbo", - "RUParaPhraserSTS (rus-Cyrl)": 72.97, - "RuSTSBenchmarkSTS (rus-Cyrl)": 81.77, - "STS22 (rus-Cyrl)": 62.89, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 81.81 + "Model": "bge-large-en-v1.5", + "BrightRetrieval (pony)": 0.36, + "BrightRetrieval (psychology)": 11.58, + "BrightRetrieval (stackoverflow)": 13.25, + "BrightRetrieval (robotics)": 10.89, + "BrightRetrieval (earth_science)": 27.73, + "BrightRetrieval (biology)": 16.42, + "BrightRetrieval (economics)": 20.87, + "BrightRetrieval (sustainable_living)": 16.9 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "LaBSE-ru-turbo" - } - ] + "spearman": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "LaBSE-ru-turbo" - } - ] + "p-MRR": [] } }, - "bge-large-zh-noinstruct": { + "SFR-Embedding-Mistral": { "BitextMining": { - "f1": [ - { - "Model": "bge-large-zh-noinstruct" - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "bge-large-zh-noinstruct", - "AmazonReviewsClassification (zh)": 41.94, - "IFlyTek": 45.32, - "JDReview": 85.38, - "MassiveIntentClassification (zh-CN)": 66.96, - "MassiveScenarioClassification (zh-CN)": 73.39, - "MultilingualSentiment": 73.7, - "OnlineShopping": 91.66, - "TNews": 52.05, - "Waimai": 86.83 - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "bge-large-zh-noinstruct", - "CLSClusteringP2P": 41.23, - "CLSClusteringS2S": 40.04, - "ThuNewsClusteringP2P": 62.03, - "ThuNewsClusteringS2S": 56.75 - } - ] + "v_measure": [] }, "PairClassification": { - "ap": [ - { - "Model": "bge-large-zh-noinstruct", - "Cmnli": 82.17, - "Ocnli": 71.37 - } - ] + "ap": [] }, "Reranking": { - "map": [ - { - "Model": "bge-large-zh-noinstruct", - "CMedQAv1": 81.72, - "CMedQAv2": 84.64, - "MMarcoReranking": 27.1, - "T2Reranking": 66.16 - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-large-zh-noinstruct", - "CmedqaRetrieval": 41.03, - "CovidRetrieval": 75.07, - "DuRetrieval": 84.68, - "EcomRetrieval": 65.6, - "MMarcoRetrieval": 81.38, - "MedicalRetrieval": 58.28, - "T2Retrieval": 84.39, - "VideoRetrieval": 73.93 + "Model": "SFR-Embedding-Mistral", + "BrightRetrieval (sustainable_living)": 19.79, + "BrightRetrieval (economics)": 17.84, + "BrightRetrieval (theoremqa_theorems)": 24.32, + "BrightRetrieval (aops)": 7.43, + "BrightRetrieval (theoremqa_questions)": 23.05, + "BrightRetrieval (psychology)": 18.97, + "BrightRetrieval (stackoverflow)": 12.72, + "BrightRetrieval (pony)": 1.97, + "BrightRetrieval (leetcode)": 27.35, + "BrightRetrieval (biology)": 19.49, + "BrightRetrieval (earth_science)": 26.63, + "BrightRetrieval (robotics)": 16.7 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "bge-large-zh-noinstruct", - "AFQMC": 43.06, - "ATEC": 48.29, - "BQ": 60.53, - "LCQMC": 74.71, - "PAWSX": 16.64, - "QBQTC": 35.2, - "STS22 (zh)": 67.19, - "STSB": 78.41 + "Model": "SFR-Embedding-Mistral", + "BrightRetrieval (earth_science)": 37.0, + "BrightRetrieval (biology)": 30.26, + "BrightRetrieval (stackoverflow)": 14.53, + "BrightRetrieval (sustainable_living)": 34.99, + "BrightRetrieval (psychology)": 47.72, + "BrightRetrieval (pony)": 2.0, + "BrightRetrieval (economics)": 24.27, + "BrightRetrieval (robotics)": 17.33 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "bge-large-zh-noinstruct" - } - ] + "spearman": [] }, "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "bge-large-zh-noinstruct" - } - ] + "p-MRR": [] } }, - "bm25s": { + "Cohere-embed-english-v3.0": { "BitextMining": { - "f1": [ - { - "Model": "bm25s" - } - ] + "f1": [] }, "Classification": { - "accuracy": [ - { - "Model": "bm25s" - } - ] + "accuracy": [] }, "Clustering": { - "v_measure": [ - { - "Model": "bm25s" - } - ] + "v_measure": [] }, "PairClassification": { - "ap": [ - { - "Model": "bm25s" - } - ] + "ap": [] }, "Reranking": { - "map": [ - { - "Model": "bm25s" - } - ] + "map": [] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bm25s", - "ArguAna": 49.28, - "CQADupstackRetrieval": 31.86, - "ClimateFEVER": 13.62, - "DBPedia": 29.91, - "FEVER": 48.09, - "FiQA2018": 25.14, - "HotpotQA": 56.91, - "MSMARCO": 21.89, - "NFCorpus": 32.08, - "NQ": 28.5, - "QuoraRetrieval": 80.42, - "SCIDOCS": 15.78, - "SciFact": 68.7, - "TRECCOVID": 62.31, - "Touche2020": 33.05 + "Model": "Cohere-embed-english-v3.0", + "AILACasedocs": 31.54, + "AILAStatutes": 27.15, + "ARCChallenge": 9.89, + "AlphaNLI": 15.1, + "BrightRetrieval (psychology)": 21.82, + "BrightRetrieval (economics)": 20.18, + "BrightRetrieval (robotics)": 16.21, + "BrightRetrieval (biology)": 18.98, + "BrightRetrieval (stackoverflow)": 16.47, + "BrightRetrieval (theoremqa_theorems)": 7.14, + "BrightRetrieval (pony)": 1.77, + "BrightRetrieval (sustainable_living)": 17.69, + "BrightRetrieval (aops)": 6.46, + "BrightRetrieval (theoremqa_questions)": 15.07, + "BrightRetrieval (leetcode)": 26.78, + "BrightRetrieval (earth_science)": 27.45, + "GerDaLIRSmall": 6.05, + "HellaSwag": 26.35, + "LeCaRDv2": 21.02, + "LegalBenchConsumerContractsQA": 77.12, + "LegalBenchCorporateLobbying": 93.68, + "LegalQuAD": 26.08, + "LegalSummarization": 61.7, + "PIQA": 28.49, + "Quail": 4.1, + "RARbCode": 57.19, + "RARbMath": 72.26, + "SIQA": 4.26, + "SpartQA": 3.75, + "TempReasonL1": 1.5, + "TempReasonL2Fact": 35.91, + "TempReasonL2Pure": 1.89, + "TempReasonL3Fact": 27.51, + "TempReasonL3Pure": 8.53, + "WinoGrande": 58.01 } - ] - }, - "STS": { - "spearman": [ + ], + "recall_at_1": [ { - "Model": "bm25s" + "Model": "Cohere-embed-english-v3.0", + "BrightRetrieval (robotics)": 9.9, + "BrightRetrieval (psychology)": 20.5, + "BrightRetrieval (biology)": 31.47, + "BrightRetrieval (economics)": 17.96, + "BrightRetrieval (stackoverflow)": 15.81, + "BrightRetrieval (pony)": 0.84, + "BrightRetrieval (sustainable_living)": 15.23, + "BrightRetrieval (earth_science)": 35.49 } ] }, + "STS": { + "spearman": [] + }, "Summarization": { - "spearman": [ - { - "Model": "bm25s" - } - ] + "spearman": [] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bm25s" + "Model": "Cohere-embed-english-v3.0", + "Core17InstructionRetrieval": 2.8, + "News21InstructionRetrieval": 0.2, + "Robust04InstructionRetrieval": -3.63 } ] }