| { | |
| "ukusnews_short_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.06168908700151238, | |
| "Qwen-Audio-Chat": 0.10399586086125925, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.06877338215394412, | |
| "WavLLM_fairseq": 0.2066783411605508, | |
| "Qwen2-Audio-7B-Instruct": 0.1194380323171217, | |
| "SALMONN_7B": 0.09042426172092653, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.10144869855926132, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.0700867627159118 | |
| } | |
| }, | |
| "imda_part6_30s_asr_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.1698509342851144, | |
| "Qwen-Audio-Chat": 0.31394240863063033, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.1789273082575623, | |
| "WavLLM_fairseq": 0.42541061709652933, | |
| "Qwen2-Audio-7B-Instruct": 0.2245352799625317, | |
| "SALMONN_7B": 0.24872817713464365, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.11292172031202054, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.17467982364056267 | |
| } | |
| }, | |
| "covost2_en_id_test": { | |
| "bleu": { | |
| "whisper_large_v3": 1.600581653970121, | |
| "Qwen-Audio-Chat": 4.102230932924371, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 27.620150160643625, | |
| "WavLLM_fairseq": 13.841886973016162, | |
| "Qwen2-Audio-7B-Instruct": 16.325186897428104, | |
| "SALMONN_7B": 14.102682915273142, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 37.60224687716629, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 10.930203684508578 | |
| } | |
| }, | |
| "imda_part3_30s_asr_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.27026366524560785, | |
| "Qwen-Audio-Chat": 0.6412550574306894, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.3035544573275043, | |
| "WavLLM_fairseq": 0.7540934640345399, | |
| "Qwen2-Audio-7B-Instruct": 0.35076166942732234, | |
| "SALMONN_7B": 0.6569229098215983, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.2919053954978684, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.29992939962527493 | |
| } | |
| }, | |
| "gigaspeech_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.09459022434812692, | |
| "Qwen-Audio-Chat": 0.13018910022587737, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.09948381629977261, | |
| "WavLLM_fairseq": 0.15491778414546403, | |
| "Qwen2-Audio-7B-Instruct": 0.11723812890302816, | |
| "SALMONN_7B": 0.10765150204693537, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.14457154747310655, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.09515429104337297 | |
| } | |
| }, | |
| "covost2_ta_en_test": { | |
| "bleu": { | |
| "whisper_large_v3": 2.451098639578599, | |
| "Qwen-Audio-Chat": 0.01699144301093184, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 2.8327095799289337, | |
| "WavLLM_fairseq": 0.1695522548322915, | |
| "Qwen2-Audio-7B-Instruct": 0.04425838146050298, | |
| "SALMONN_7B": 0.3649023706010388, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 5.023057608950299, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 2.4245628096245917 | |
| } | |
| }, | |
| "librispeech_test_other": { | |
| "wer": { | |
| "whisper_large_v3": 0.03660128246354058, | |
| "Qwen-Audio-Chat": 0.043467569561352074, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.05307658841999735, | |
| "WavLLM_fairseq": 0.04798834811886432, | |
| "Qwen2-Audio-7B-Instruct": 0.060415760304159495, | |
| "SALMONN_7B": 0.09671439650443565, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.041576030415949455, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.03714982881570734 | |
| } | |
| }, | |
| "parliament_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.0753619074652285, | |
| "Qwen-Audio-Chat": 0.26279685873781816, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.06282524363705176, | |
| "WavLLM_fairseq": 0.5216434856656259, | |
| "Qwen2-Audio-7B-Instruct": 0.23270886555019396, | |
| "SALMONN_7B": 0.3010928186204939, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.058922319992430694, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.07517267480367111 | |
| } | |
| }, | |
| "earnings22_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.15887899737116104, | |
| "Qwen-Audio-Chat": 0.3664994875132684, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.1448629161356777, | |
| "WavLLM_fairseq": 0.6671766188447099, | |
| "Qwen2-Audio-7B-Instruct": 0.23542555661330924, | |
| "SALMONN_7B": 0.3597423676988383, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.1652245056860175, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.15611126487402763 | |
| } | |
| }, | |
| "imda_part2_asr_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.3171008846684522, | |
| "Qwen-Audio-Chat": 0.45479263046830615, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.32988393799204613, | |
| "WavLLM_fairseq": 0.4463923382842302, | |
| "Qwen2-Audio-7B-Instruct": 0.1905689473257041, | |
| "SALMONN_7B": 0.42346400454508565, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.048088629169710254, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.31912994075156237 | |
| } | |
| }, | |
| "ukusnews_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.07135564378899603, | |
| "Qwen-Audio-Chat": 0.3158631121194933, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.07388920400831915, | |
| "WavLLM_fairseq": 0.5911892607298166, | |
| "Qwen2-Audio-7B-Instruct": 0.13843826810361126, | |
| "SALMONN_7B": 0.18918510115333712, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.12554358101720553, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.07642276422764227 | |
| } | |
| }, | |
| "earnings21_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.11863959266711877, | |
| "Qwen-Audio-Chat": 0.2655529121410546, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.11416493424197618, | |
| "WavLLM_fairseq": 0.6447482518259942, | |
| "Qwen2-Audio-7B-Instruct": 0.18872219319407232, | |
| "SALMONN_7B": 0.2577708974886327, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.13488732754499672, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.11773910240019567 | |
| } | |
| }, | |
| "covost2_zh_en_test": { | |
| "bleu": { | |
| "whisper_large_v3": 14.673689493155793, | |
| "Qwen-Audio-Chat": 9.898238298955656, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 15.209998552437538, | |
| "WavLLM_fairseq": 2.368659001743569, | |
| "Qwen2-Audio-7B-Instruct": 16.466557744958333, | |
| "SALMONN_7B": 5.296039450108202, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 18.76473995941838, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 14.154700735606419 | |
| } | |
| }, | |
| "covost2_en_ta_test": { | |
| "bleu": { | |
| "whisper_large_v3": 0.02107778621423822, | |
| "Qwen-Audio-Chat": 0.03451483807236294, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 8.433062902024755, | |
| "WavLLM_fairseq": 0.0033159224040994286, | |
| "Qwen2-Audio-7B-Instruct": 0.03245972071872916, | |
| "SALMONN_7B": 0.00046745670226766583, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 14.407399367512914, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 1.0368044741318085 | |
| } | |
| }, | |
| "librispeech_test_clean": { | |
| "wer": { | |
| "whisper_large_v3": 0.01878749009695552, | |
| "Qwen-Audio-Chat": 0.020258799562379748, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.032349945297468596, | |
| "WavLLM_fairseq": 0.02103218017882069, | |
| "Qwen2-Audio-7B-Instruct": 0.035141660693401744, | |
| "SALMONN_7B": 0.10270871845172973, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.022918474365262006, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.018334779492209605 | |
| } | |
| }, | |
| "tedlium3_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.037649480146197796, | |
| "Qwen-Audio-Chat": 0.04052375714133636, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.04900464852205386, | |
| "WavLLM_fairseq": 0.06621482559171073, | |
| "Qwen2-Audio-7B-Instruct": 0.06114048472375004, | |
| "SALMONN_7B": 0.0459884319222171, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.07884745040985061, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.038146268762641496 | |
| } | |
| }, | |
| "imda_part1_asr_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.06844171360300393, | |
| "Qwen-Audio-Chat": 0.10550313315290274, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.07041669714480775, | |
| "WavLLM_fairseq": 0.10077292565771828, | |
| "Qwen2-Audio-7B-Instruct": 0.07197717796796138, | |
| "SALMONN_7B": 0.0925804013361617, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.042254894789457, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.06922195401458074 | |
| } | |
| }, | |
| "common_voice_15_en_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.10001863741235596, | |
| "Qwen-Audio-Chat": 0.11272421128398918, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.10600831614192711, | |
| "WavLLM_fairseq": 0.14533325621300636, | |
| "Qwen2-Audio-7B-Instruct": 0.11438872500819404, | |
| "SALMONN_7B": 0.3062255383962828, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.07811646454714301, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.09876543209876543 | |
| } | |
| }, | |
| "mediacorp_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.12054884024828487, | |
| "Qwen-Audio-Chat": 0.4498529892192094, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.12455080039202875, | |
| "WavLLM_fairseq": 0.3595230316889905, | |
| "Qwen2-Audio-7B-Instruct": 0.18694870957203527, | |
| "SALMONN_7B": 0.32089186540346293, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.170859196341065, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.13598497223129696 | |
| } | |
| }, | |
| "idpc_short_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.1662526275558953, | |
| "Qwen-Audio-Chat": 0.6008025988916491, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.16931014714313014, | |
| "WavLLM_fairseq": 0.36728454041658704, | |
| "Qwen2-Audio-7B-Instruct": 0.21326199120963119, | |
| "SALMONN_7B": 0.26313777947639977, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.24918784635964075, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.15803554366520162 | |
| } | |
| }, | |
| "seame_dev_man": { | |
| "wer": { | |
| "whisper_large_v3": 0.7225930420711975, | |
| "Qwen-Audio-Chat": 0.8783373786407767, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.7824973031283711, | |
| "WavLLM_fairseq": 1.2913969795037756, | |
| "Qwen2-Audio-7B-Instruct": 0.5522518878101402, | |
| "SALMONN_7B": 1.2721817691477886, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.388282092772384, | |
| "gemini-1.5-flash": 0.9690871089536138, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.6848705501618123 | |
| } | |
| }, | |
| "cna_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.13841717398269784, | |
| "Qwen-Audio-Chat": 0.19753284203780838, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.15171419416853574, | |
| "WavLLM_fairseq": 0.26946491509131687, | |
| "Qwen2-Audio-7B-Instruct": 0.2067713339741536, | |
| "SALMONN_7B": 0.15395706504325538, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.15924383210509452, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.13798996048275125 | |
| } | |
| }, | |
| "ytb_asr_batch1": { | |
| "wer": { | |
| "whisper_large_v3": 0.12226319428439733, | |
| "Qwen-Audio-Chat": 0.2297764461857571, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.1400092187139894, | |
| "WavLLM_fairseq": 0.41876008296842593, | |
| "Qwen2-Audio-7B-Instruct": 0.16843358684796805, | |
| "SALMONN_7B": 0.21487285856956287, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.11484981178458939, | |
| "gemini-1.5-flash": 0.1089344703080587, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.12579703464700007 | |
| } | |
| }, | |
| "mediacorp_short_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.11715763436024286, | |
| "Qwen-Audio-Chat": 0.2548909377108163, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.14571621317742298, | |
| "WavLLM_fairseq": 0.2621992354396222, | |
| "Qwen2-Audio-7B-Instruct": 0.17180121430177647, | |
| "SALMONN_7B": 0.1751742747919946, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.13301101866426804, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.11434675061839443 | |
| } | |
| }, | |
| "peoples_speech_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.14602420615337386, | |
| "Qwen-Audio-Chat": 0.31419144746723354, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.20140159998943682, | |
| "WavLLM_fairseq": 0.3792176325635977, | |
| "Qwen2-Audio-7B-Instruct": 0.2165498391593041, | |
| "SALMONN_7B": 0.23699946689025367, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.21050407754683692, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.14540692118393275 | |
| } | |
| }, | |
| "covost2_en_zh_test": { | |
| "bleu": { | |
| "whisper_large_v3": 0.16408986541757878, | |
| "Qwen-Audio-Chat": 15.330641138043728, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 35.274306071307024, | |
| "WavLLM_fairseq": 31.96381187282953, | |
| "Qwen2-Audio-7B-Instruct": 25.765420247070075, | |
| "SALMONN_7B": 33.88941292215531, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 43.941098854450516, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054 | |
| } | |
| }, | |
| "tedlium3_long_form_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.03208650948413402, | |
| "Qwen-Audio-Chat": 0.2911540507002305, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.04396383619925545, | |
| "WavLLM_fairseq": 0.4536784258110264, | |
| "Qwen2-Audio-7B-Instruct": 0.08739585179932637, | |
| "SALMONN_7B": 0.14231519234178336, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.10228682857649353, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.04754476156709803 | |
| } | |
| }, | |
| "seame_dev_sge": { | |
| "wer": { | |
| "whisper_large_v3": 0.5377268970583734, | |
| "Qwen-Audio-Chat": 1.05567969634822, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5840399155162387, | |
| "WavLLM_fairseq": 1.2204842511249197, | |
| "Qwen2-Audio-7B-Instruct": 0.5486546879304539, | |
| "SALMONN_7B": 1.0189782362484312, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.35550521901496834, | |
| "gemini-1.5-flash": 1.1100431601824359, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.507882090054792 | |
| } | |
| }, | |
| "aishell_asr_zh_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.12359684029221357, | |
| "Qwen-Audio-Chat": 0.9469917443725129, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.20886539565639167, | |
| "WavLLM_fairseq": 0.7054601967888183, | |
| "Qwen2-Audio-7B-Instruct": 0.09260359129694522, | |
| "SALMONN_7B": 0.8259290055631446, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.13165449110094832, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.12450753301261111 | |
| } | |
| }, | |
| "covost2_id_en_test": { | |
| "bleu": { | |
| "whisper_large_v3": 46.01512198258627, | |
| "Qwen-Audio-Chat": 0.45648619714728844, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.80524126004861, | |
| "WavLLM_fairseq": 5.933522277713613, | |
| "Qwen2-Audio-7B-Instruct": 6.326113431899141, | |
| "SALMONN_7B": 26.89649039333571, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 44.43289180618449, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 46.79924664837527 | |
| } | |
| }, | |
| "ytb_asr_batch2": { | |
| "wer": { | |
| "whisper_large_v3": 0.17210509244242622, | |
| "Qwen-Audio-Chat": 0.4315277327278625, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.2192622950819672, | |
| "WavLLM_fairseq": 0.48091685587631094, | |
| "Qwen2-Audio-7B-Instruct": 0.2080008649583739, | |
| "SALMONN_7B": 0.3238620391393664, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.15162720294085846, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.23561466104443723 | |
| } | |
| }, | |
| "imda_part5_30s_asr_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.2143555471246589, | |
| "Qwen-Audio-Chat": 0.3016882870525747, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.22881615619208825, | |
| "WavLLM_fairseq": 0.39796588405247263, | |
| "Qwen2-Audio-7B-Instruct": 0.27856006770658537, | |
| "SALMONN_7B": 0.34868891450584405, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.17694182194919086, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.22004640235805695 | |
| } | |
| }, | |
| "parliament_short_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.05543951935226013, | |
| "Qwen-Audio-Chat": 0.09347360821020603, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.07325752301384698, | |
| "WavLLM_fairseq": 0.09512390087929656, | |
| "Qwen2-Audio-7B-Instruct": 0.08416492612361723, | |
| "SALMONN_7B": 0.08676929424202573, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.056935097083623425, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.05742502771975968 | |
| } | |
| }, | |
| "idpc_test": { | |
| "wer": { | |
| "whisper_large_v3": 0.19880239520958085, | |
| "Qwen-Audio-Chat": 0.7710863986313088, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.16766467065868262, | |
| "WavLLM_fairseq": 0.7686911890504705, | |
| "Qwen2-Audio-7B-Instruct": 0.19093242087254064, | |
| "SALMONN_7B": 0.4550898203592814, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.30008554319931563, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.17741659538066723 | |
| } | |
| }, | |
| "imda_part3_30s_ds_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 16.4, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 45.4, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 31.6, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 33.8, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 9.0, | |
| "success_rate": 0.99 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 48.4, | |
| "success_rate": 0.99 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 37.400000000000006, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 47.400000000000006, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "cn_college_listen_mcq_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 63.232056362835756, | |
| "success_rate": 0.9995596653456627 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 91.85380889476001, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 66.31439894319684, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 74.7247908410392, | |
| "success_rate": 0.9995596653456627 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 50.99075297225891, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 88.50726552179657, | |
| "success_rate": 1.0 | |
| }, | |
| "gemini-1.5-flash": { | |
| "judge_score": 89.25583443416997, | |
| "success_rate": 0.9991193306913254 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 85.2928225451343, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part3_30s_sqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 51.08, | |
| "success_rate": 0.998 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 70.17999999999999, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 60.620000000000005, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 50.8, | |
| "success_rate": 0.999 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 70.28, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 73.0, | |
| "success_rate": 0.999 | |
| } | |
| } | |
| }, | |
| "openhermes_audio_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 10.600000000000001, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 72.2, | |
| "success_rate": 0.96 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 19.2, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 44.800000000000004, | |
| "success_rate": 0.96 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 15.8, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 65.6, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 63.0, | |
| "success_rate": 0.93 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 75.0, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part5_30s_sqa_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 47.800000000000004, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 74.0, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 50.8, | |
| "success_rate": 0.99 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 51.6, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 44.6, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 64.80000000000001, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 57.800000000000004, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 64.80000000000001, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "slue_p2_sqa5_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 79.36274509803921, | |
| "success_rate": 0.9975490196078431 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 88.57843137254902, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 83.92156862745098, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 80.04901960784315, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 83.48039215686273, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 86.76470588235293, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 82.99019607843137, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 87.79411764705883, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "ytb_sds_batch1": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 43.878954607977995, | |
| "success_rate": 0.9917469050894085 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 64.12654745529574, | |
| "success_rate": 0.9986244841815681 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 55.625859697386524, | |
| "success_rate": 0.9917469050894085 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 51.5818431911967, | |
| "success_rate": 0.9986244841815681 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 31.279229711141674, | |
| "success_rate": 0.9972489683631361 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 53.97524071526823, | |
| "success_rate": 0.9944979367262724 | |
| }, | |
| "gemini-1.5-flash": { | |
| "judge_score": 65.9697386519945, | |
| "success_rate": 0.9931224209078404 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 59.44979367262724, | |
| "success_rate": 0.9972489683631361 | |
| } | |
| } | |
| }, | |
| "voxceleb_gender_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 70.5990972507181, | |
| "success_rate": 0.9997948297086582 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 34.94050061551087, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 69.61427985227739, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 99.1177677472302, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 88.79770209273697, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 99.75379565038982, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 42.921624948707425, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "dream_tts_mcq_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 59.749085206481965, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 89.33612127548353, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 66.5446941975954, | |
| "success_rate": 0.9984317825405122 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 66.49242028227914, | |
| "success_rate": 0.9994772608468374 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 56.455828541557764, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 84.31782540512285, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 86.4610559330894, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "ytb_sqa_batch1": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 60.827586206896555, | |
| "success_rate": 0.9980295566502463 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 70.18719211822659, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 60.70935960591133, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 60.453201970443345, | |
| "success_rate": 0.9980295566502463 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 55.665024630541865, | |
| "success_rate": 0.9990147783251232 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 64.51231527093596, | |
| "success_rate": 0.9980295566502463 | |
| }, | |
| "gemini-1.5-flash": { | |
| "judge_score": 78.06896551724138, | |
| "success_rate": 0.9980295566502463 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 67.3103448275862, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "spoken_squad_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 64.8327415436367, | |
| "success_rate": 0.9990655952158475 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 88.61894972902262, | |
| "success_rate": 0.9998131190431695 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 77.64903756307233, | |
| "success_rate": 0.997383666604373 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 64.86264249672958, | |
| "success_rate": 0.9971967856475425 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 66.39506634273968, | |
| "success_rate": 0.9994393571295085 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 73.66473556344609, | |
| "success_rate": 0.999252476172678 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 83.81984675761541, | |
| "success_rate": 0.998131190431695 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 90.12521024107643, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part4_30s_sqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 41.92, | |
| "success_rate": 0.999 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 66.34, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 50.279999999999994, | |
| "success_rate": 0.999 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 61.980000000000004, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 64.9, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_gr_dialogue": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 37.2, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 19.6, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 46.766666666666666, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 61.56666666666667, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 42.733333333333334, | |
| "success_rate": 0.9993333333333333 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 93.76666666666667, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 25.433333333333337, | |
| "success_rate": 0.9996666666666667 | |
| } | |
| } | |
| }, | |
| "imda_ar_dialogue": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 0.6666666666666667, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 7.633333333333334, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 0.23333333333333336, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 0.9666666666666667, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 0.06666666666666667, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 77.83333333333333, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 9.666666666666666, | |
| "success_rate": 0.9986666666666667 | |
| } | |
| } | |
| }, | |
| "audiocaps_test": { | |
| "meteor": { | |
| "Qwen-Audio-Chat": 0.27553015076950976, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.05796819723943051, | |
| "WavLLM_fairseq": 0.041732965094428545, | |
| "Qwen2-Audio-7B-Instruct": 0.19891712076314283, | |
| "SALMONN_7B": 0.20994052484339956, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.24920047034353812, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.07953048457785493 | |
| }, | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 47.04090909090909, | |
| "success_rate": 0.9990909090909091 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 3.0954545454545457, | |
| "success_rate": 0.9995454545454545 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 5.5, | |
| "success_rate": 0.9977272727272727 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 40.77727272727273, | |
| "success_rate": 0.9977272727272727 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 37.445454545454545, | |
| "success_rate": 0.9988636363636364 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 38.00454545454545, | |
| "success_rate": 0.9997727272727273 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 2.4727272727272727, | |
| "success_rate": 0.9997727272727273 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 4.868181818181818, | |
| "success_rate": 0.9981818181818182 | |
| } | |
| } | |
| }, | |
| "imda_part5_30s_ds_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 39.14, | |
| "success_rate": 0.996 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 61.48, | |
| "success_rate": 0.996 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 45.38, | |
| "success_rate": 0.997 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 24.340000000000003, | |
| "success_rate": 0.998 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 54.379999999999995, | |
| "success_rate": 0.998 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 63.68000000000001, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "ytb_pqa_batch1": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 37.16117216117216, | |
| "success_rate": 0.9990842490842491 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 55.01831501831502, | |
| "success_rate": 0.9990842490842491 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 40.95238095238095, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 36.97802197802198, | |
| "success_rate": 0.9981684981684982 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 32.124542124542124, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 40.97069597069597, | |
| "success_rate": 0.9990842490842491 | |
| }, | |
| "gemini-1.5-flash": { | |
| "judge_score": 49.908424908424905, | |
| "success_rate": 0.9972527472527473 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 52.252747252747255, | |
| "success_rate": 0.9990842490842491 | |
| } | |
| } | |
| }, | |
| "imda_ar_sentence": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 3.933333333333333, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 26.016666666666666, | |
| "success_rate": 0.9998333333333334 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 2.6833333333333336, | |
| "success_rate": 0.999 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 2.55, | |
| "success_rate": 0.9998333333333334 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 2.5166666666666666, | |
| "success_rate": 0.999 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 7.816666666666666, | |
| "success_rate": 0.9995 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 12.416666666666666, | |
| "success_rate": 0.9995 | |
| } | |
| } | |
| }, | |
| "imda_part6_30s_sqa_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 51.4, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 71.6, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 62.199999999999996, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 53.6, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 46.8, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 67.2, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 64.0, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 67.0, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_gr_sentence": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 57.550000000000004, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 26.35, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 49.06666666666666, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 68.38333333333333, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 59.766666666666666, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 66.13333333333333, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 36.016666666666666, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part4_30s_ds_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 18.060000000000002, | |
| "success_rate": 0.994 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 43.4, | |
| "success_rate": 0.999 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 25.019999999999996, | |
| "success_rate": 0.998 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 9.399999999999999, | |
| "success_rate": 0.999 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 37.879999999999995, | |
| "success_rate": 0.993 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 47.74, | |
| "success_rate": 0.999 | |
| } | |
| } | |
| }, | |
| "meld_emotion_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 50.72796934865901, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 47.356321839080465, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 41.57088122605364, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 41.60919540229885, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 30.536398467432953, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 36.36015325670498, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 36.81992337164751, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "muchomusic_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 59.0564448188711, | |
| "success_rate": 0.9991575400168492 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 51.727042965459134, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 44.3133951137321, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 71.60909856781802, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 50.88458298230834, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 57.7927548441449, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 56.44481887110362, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part6_30s_ds_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 43.84, | |
| "success_rate": 0.993 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 65.6, | |
| "success_rate": 0.996 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 48.38, | |
| "success_rate": 0.999 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 27.12, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 59.2, | |
| "success_rate": 0.999 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 67.58, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "clotho_aqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 61.934856587263, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 24.647544968400585, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 43.01199466903598, | |
| "success_rate": 0.998223011994669 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 50.919591292758774, | |
| "success_rate": 0.9991115059973346 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 57.75401069518716, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 63.15021876519203, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 29.47134606841404, | |
| "success_rate": 0.9991115059973346 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 28.076410484229232, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part3_30s_sqa_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 32.2, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 56.0, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 45.199999999999996, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 42.0, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 40.599999999999994, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 51.4, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 49.0, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 52.800000000000004, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part6_30s_sqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 63.040000000000006, | |
| "success_rate": 0.998 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 83.08, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 69.42, | |
| "success_rate": 0.998 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 66.86, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 80.60000000000001, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 81.8, | |
| "success_rate": 0.999 | |
| } | |
| } | |
| }, | |
| "imda_30s_ds_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 31.295, | |
| "success_rate": 0.99625 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 54.515, | |
| "success_rate": 0.99575 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 38.915, | |
| "success_rate": 0.99775 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 18.345, | |
| "success_rate": 0.999 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 48.269999999999996, | |
| "success_rate": 0.998 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 57.99, | |
| "success_rate": 0.99975 | |
| } | |
| } | |
| }, | |
| "iemocap_emotion_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 29.382470119521916, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 44.322709163346616, | |
| "success_rate": 0.99800796812749 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 59.76095617529881, | |
| "success_rate": 0.999003984063745 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 53.98406374501992, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 23.804780876494025, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 48.505976095617534, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 46.713147410358566, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part6_30s_ds_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 40.4, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 65.4, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 49.400000000000006, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 46.2, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 24.2, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 62.599999999999994, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 57.199999999999996, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 64.4, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_30s_sqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 54.669999999999995, | |
| "success_rate": 0.99875 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 75.09, | |
| "success_rate": 0.99875 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 62.190000000000005, | |
| "success_rate": 0.99925 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 72.475, | |
| "success_rate": 0.99925 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 75.11999999999999, | |
| "success_rate": 0.9995 | |
| } | |
| } | |
| }, | |
| "wavcaps_qa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 42.69736842105263, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 18.88157894736842, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 26.25, | |
| "success_rate": 0.9967105263157895 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 44.473684210526315, | |
| "success_rate": 0.9967105263157895 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 47.30263157894737, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 46.31578947368421, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 16.710526315789473, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 14.736842105263158, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "wavcaps_test": { | |
| "meteor": { | |
| "Qwen-Audio-Chat": 0.2355106805560457, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.120421856260385, | |
| "WavLLM_fairseq": 0.06399522524688675, | |
| "Qwen2-Audio-7B-Instruct": 0.21342294856199182, | |
| "SALMONN_7B": 0.17175112770658157, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.3175511907248581, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.1388630786594543 | |
| }, | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 32.9364161849711, | |
| "success_rate": 0.999421965317919 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 6.3468208092485545, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 6.901734104046243, | |
| "success_rate": 0.9976878612716763 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 33.78034682080925, | |
| "success_rate": 0.9976878612716763 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 23.76878612716763, | |
| "success_rate": 0.999421965317919 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 33.97687861271676, | |
| "success_rate": 0.999421965317919 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 3.445086705202312, | |
| "success_rate": 0.9988439306358381 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 4.61271676300578, | |
| "success_rate": 0.999421965317919 | |
| } | |
| } | |
| }, | |
| "imda_part3_30s_ds_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 25.22, | |
| "success_rate": 0.997 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 48.339999999999996, | |
| "success_rate": 0.998 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 36.5, | |
| "success_rate": 0.997 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 35.54, | |
| "success_rate": 0.996 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 12.82, | |
| "success_rate": 0.998 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 42.32, | |
| "success_rate": 0.998 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 52.38, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "meld_sentiment_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 44.90421455938697, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 56.59003831417625, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 51.072796934865906, | |
| "success_rate": 0.9996168582375479 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 53.9463601532567, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 41.7624521072797, | |
| "success_rate": 0.9996168582375479 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 46.206896551724135, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 45.593869731800766, | |
| "success_rate": 0.9996168582375479 | |
| } | |
| } | |
| }, | |
| "imda_part5_30s_ds_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 28.2, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 58.0, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 45.199999999999996, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 40.4, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 17.2, | |
| "success_rate": 0.99 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 57.0, | |
| "success_rate": 0.99 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 49.0, | |
| "success_rate": 0.99 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 56.8, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part5_30s_sqa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 61.260000000000005, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 80.34, | |
| "success_rate": 0.999 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 68.52000000000001, | |
| "success_rate": 0.999 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 62.62, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 76.56, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 80.36, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "voxceleb_accent_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 48.05088223225277, | |
| "success_rate": 0.9995896594173164 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 24.640951990151827, | |
| "success_rate": 0.9997948297086582 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 39.96717275338531, | |
| "success_rate": 0.9993844891259746 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 29.187525646286417, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 34.222404595814524, | |
| "success_rate": 0.9993844891259746 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 47.01682396389003, | |
| "success_rate": 0.9997948297086582 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 39.32704144439885, | |
| "success_rate": 0.9993844891259746 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 39.462453836684446, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "audiocaps_qa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 50.22364217252396, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 18.466453674121407, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 29.840255591054312, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 45.75079872204473, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 50.287539936102235, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 49.77635782747604, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 17.380191693290733, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 14.63258785942492, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "public_sg_speech_qa_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 63.16860465116279, | |
| "success_rate": 0.9941860465116279 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 73.11046511627907, | |
| "success_rate": 0.998546511627907 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 58.54651162790698, | |
| "success_rate": 0.9825581395348837 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 58.31395348837209, | |
| "success_rate": 0.9927325581395349 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 59.24418604651163, | |
| "success_rate": 0.997093023255814 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 59.7093023255814, | |
| "success_rate": 0.997093023255814 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 64.94186046511628, | |
| "success_rate": 0.9927325581395349 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 73.02325581395348, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_30s_ds_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 30.65, | |
| "success_rate": 0.995 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 50.15, | |
| "success_rate": 0.9975 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 37.599999999999994, | |
| "success_rate": 0.995 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 16.15, | |
| "success_rate": 0.9975 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 43.849999999999994, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 54.65, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "alpaca_audio_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 9.8, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 73.8, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 21.6, | |
| "success_rate": 0.99 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 52.599999999999994, | |
| "success_rate": 0.99 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 17.2, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 74.80000000000001, | |
| "success_rate": 0.99 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 70.8, | |
| "success_rate": 0.96 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 77.8, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_30s_sqa_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 42.199999999999996, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 62.95, | |
| "success_rate": 0.9975 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 47.1, | |
| "success_rate": 0.995 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 42.300000000000004, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 55.7, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 61.550000000000004, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part4_30s_ds_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 16.0, | |
| "success_rate": 0.99 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 44.0, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 31.6, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 24.8, | |
| "success_rate": 0.97 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 7.0, | |
| "success_rate": 0.99 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 46.4, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 36.0, | |
| "success_rate": 0.99 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 48.2, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_part4_30s_sqa_human_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 37.8, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 66.0, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 46.6, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 39.6, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 36.6, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 53.2, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 53.8, | |
| "success_rate": 1.0 | |
| } | |
| }, | |
| "gpt4o_judge": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 61.4, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "iemocap_gender_test": { | |
| "llama3_70b_judge": { | |
| "Qwen-Audio-Chat": { | |
| "judge_score": 50.0996015936255, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 15.737051792828685, | |
| "success_rate": 1.0 | |
| }, | |
| "WavLLM_fairseq": { | |
| "judge_score": 51.932270916334666, | |
| "success_rate": 1.0 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 92.80876494023903, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 81.31474103585658, | |
| "success_rate": 1.0 | |
| }, | |
| "MERaLiON-AudioLLM-Whisper-SEA-LION": { | |
| "judge_score": 93.48605577689243, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 44.22310756972111, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_30s_gr_test": { | |
| "llama3_70b_judge": { | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 18.46666666666667, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "imda_30s_ar_test": { | |
| "llama3_70b_judge": { | |
| "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": { | |
| "judge_score": 15.773333333333333, | |
| "success_rate": 0.9996666666666667 | |
| }, | |
| "Qwen2-Audio-7B-Instruct": { | |
| "judge_score": 5.106666666666667, | |
| "success_rate": 1.0 | |
| }, | |
| "SALMONN_7B": { | |
| "judge_score": 5.673333333333334, | |
| "success_rate": 1.0 | |
| }, | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": { | |
| "judge_score": 27.186666666666667, | |
| "success_rate": 0.9996666666666667 | |
| } | |
| } | |
| }, | |
| "mmau_mini": { | |
| "llama3_70b_judge": { | |
| "phi_4_multimodal_instruct": { | |
| "judge_score": 59.4, | |
| "success_rate": 1.0 | |
| } | |
| } | |
| }, | |
| "nlb_asr_test": { | |
| "wer": { | |
| "cascade_whisper_large_v3_llama_3_8b_instruct": 0.2796380263880551 | |
| } | |
| } | |
| } |