LIBRA-Leaderboard / results /GLM4-9B-Chat.json
ai-forever's picture
Upload 19 files
1ddbee0 verified
raw
history blame
3.14 kB
{"passkey": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "matreshka_yes_no": {"4k": 0.7926421404682275, "8k": 0.75, "16k": 0.7133333333333334, "32k": 0.67, "64k": 0.5966666666666667, "128k": 0.56, "dataset_total_score": 0.6804403567447045}, "matreshka_names": {"4k": 0.6466666666666666, "8k": 0.5066666666666667, "16k": 0.52, "32k": 0.47333333333333333, "64k": 0.37333333333333335, "128k": 0.32, "dataset_total_score": 0.47333333333333333}, "passkey_with_librusec": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "librusec_history": {"8k": 0.84375, "16k": 0.84375, "32k": 0.84375, "64k": 0.75, "dataset_total_score": 0.8203125}, "ru_gsm100": {"16k": 0.08, "dataset_total_score": 0.08}, "ru_sci_passage_count": {"4k": 0.27, "8k": 0.08, "16k": 0.09, "32k": 0.0, "64k": 0.01, "128k": 0.0, "dataset_total_score": 0.07500000000000001}, "ru_2wikimultihopqa": {"8k": 0.5510204081632653, "16k": 0.5546875, "32k": 0.35772357723577236, "dataset_total_score": 0.4878104951330125}, "long_context_multiq": {"8k": 0.265, "16k": 0.035, "4k": 0.055, "64k": 0.005, "32k": 0.005, "128k": 0.1, "dataset_total_score": 0.0775}, "ru_sci_abstract_retrieval": {"4k": 0.9819047619047618, "8k": 0.923411865911866, "16k": 0.9122101461259002, "32k": 0.8189266620312142, "64k": 0.6411071734029656, "128k": 0.3908439729202464, "dataset_total_score": 0.7780674303828258}, "ru_trec": {"4k": 0.5675675675675675, "8k": 0.7, "16k": 0.7582417582417582, "32k": 0.7704918032786885, "dataset_total_score": 0.6990752822720037}, "ru_sci_fi": {"32k": 0.3888888888888889, "64k": 0.42857142857142855, "dataset_total_score": 0.4087301587301587}, "librusec_mhqa": {"8k": 0.4453125, "dataset_total_score": 0.4453125}, "ru_babilong_qa1": {"4k": 0.699375, "8k": 0.59, "16k": 0.6, "32k": 0.508125, "64k": 0.429375, "128k": 0.42, "dataset_total_score": 0.5411458333333333}, "ru_babilong_qa2": {"4k": 0.389375, "8k": 0.33, "16k": 0.299375, "32k": 0.2693333333333333, "64k": 0.2675, "128k": 0.23491666666666666, "dataset_total_score": 0.29841666666666666}, "ru_babilong_qa3": {"4k": 0.24598809523809526, "8k": 0.2792380952380953, "16k": 0.21408333333333335, "32k": 0.2264761904761905, "64k": 0.18666666666666668, "128k": 0.18545833333333334, "dataset_total_score": 0.22298511904761908}, "ru_babilong_qa4": {"4k": 0.6207142857142857, "8k": 0.5964285714285714, "16k": 0.5657142857142857, "32k": 0.58, "64k": 0.43, "128k": 0.37714285714285717, "dataset_total_score": 0.5283333333333334}, "ru_babilong_qa5": {"4k": 0.7300000000000001, "8k": 0.7350000000000001, "16k": 0.7200000000000002, "32k": 0.6683333333333334, "64k": 0.6966666666666668, "128k": 0.6700000000000002, "dataset_total_score": 0.7033333333333335}, "ru_quality": {"16k": 0.6521739130434783, "8k": 0.8292682926829268, "dataset_total_score": 0.7407211028632026}, "ru_tpo": {"8k": 0.8685258964143426, "dataset_total_score": 0.8685258964143426}, "ru_qasper": {"16k": 0.05927748156784547, "8k": 0.06532155413695329, "32k": 0.025813608477215297, "dataset_total_score": 0.050137548060671354}, "total_score": 0.5228181376023114}