Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/bleu/st.csv +3 -3
- results/llama3_70b_judge/ac.csv +3 -3
- results/llama3_70b_judge/aqa.csv +3 -3
- results/llama3_70b_judge/ar.csv +3 -3
- results/llama3_70b_judge/si.csv +2 -2
- results/llama3_70b_judge/sqa.csv +3 -3
- results/llama3_70b_judge_binary/er.csv +4 -4
- results/llama3_70b_judge_binary/gr.csv +3 -3
- results/llama3_70b_judge_binary/sqa.csv +3 -3
- results/meteor/ac.csv +6 -6
- results/wer/asr.csv +7 -7
- results/wer/cnasr.csv +2 -2
results/bleu/st.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
MERaLiON_AudioLLM_v1,32.62336432354048,37.983678588088225,8.496204861105712,37.072090507731545,15.012007824033613,3.9734959499000135
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
5 |
-
SALMONN_7B
|
6 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
7 |
-
|
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
MERaLiON_AudioLLM_v1,32.62336432354048,37.983678588088225,8.496204861105712,37.072090507731545,15.012007824033613,3.9734959499000135
|
3 |
+
Qwen-Audio-Chat,4.102230932924371,15.330641138043728,,0.45648619714728844,9.898238298955656,0.01699144301093184
|
4 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
5 |
+
SALMONN_7B,14.102682915273142,33.88941292215531,0.00046745670226766583,26.89649039333571,5.296039450108202,0.3649023706010388
|
6 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,27.620150160643625,35.274306071307024,8.433062902024755,46.80524126004861,15.209998552437538,2.8327095799289337
|
results/llama3_70b_judge/ac.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
MERaLiON_AudioLLM_v1,35.04090909090909,29.028901734104046
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,5.5,6.901734104046243
|
5 |
-
SALMONN_7B
|
6 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
7 |
-
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
MERaLiON_AudioLLM_v1,35.04090909090909,29.028901734104046
|
3 |
+
Qwen-Audio-Chat,47.04090909090909,32.9364161849711
|
4 |
wavllm_fairseq,5.5,6.901734104046243
|
5 |
+
SALMONN_7B,37.445454545454545,23.76878612716763
|
6 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,3.0954545454545457,6.3468208092485545
|
results/llama3_70b_judge/aqa.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
MERaLiON_AudioLLM_v1,59.902771025765674,45.11182108626198,38.09210526315789
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
5 |
-
SALMONN_7B
|
6 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
7 |
-
|
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
MERaLiON_AudioLLM_v1,59.902771025765674,45.11182108626198,38.09210526315789
|
3 |
+
Qwen-Audio-Chat,61.934856587263,50.22364217252396,42.69736842105263
|
4 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
5 |
+
SALMONN_7B,57.75401069518716,50.287539936102235,47.30263157894737
|
6 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,24.647544968400585,18.466453674121407,18.88157894736842
|
results/llama3_70b_judge/ar.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,voxceleb_accent_test
|
2 |
MERaLiON_AudioLLM_v1,46.335658596635206
|
3 |
-
Qwen-Audio-Chat,
|
4 |
wavllm_fairseq,
|
5 |
-
SALMONN_7B,
|
6 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
7 |
-
|
|
|
1 |
Model,voxceleb_accent_test
|
2 |
MERaLiON_AudioLLM_v1,46.335658596635206
|
3 |
+
Qwen-Audio-Chat,48.05088223225277
|
4 |
wavllm_fairseq,
|
5 |
+
SALMONN_7B,34.222404595814524
|
6 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,24.640951990151827
|
results/llama3_70b_judge/si.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
MERaLiON_AudioLLM_v1,71.39999999999999,73.4
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,22.400000000000002,21.6
|
5 |
SALMONN_7B,15.8,17.2
|
6 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
7 |
-
|
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
MERaLiON_AudioLLM_v1,71.39999999999999,73.4
|
3 |
+
Qwen-Audio-Chat,10.600000000000001,9.8
|
4 |
wavllm_fairseq,22.400000000000002,21.6
|
5 |
SALMONN_7B,15.8,17.2
|
6 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,72.2,73.8
|
results/llama3_70b_judge/sqa.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test
|
2 |
MERaLiON_AudioLLM_v1,82.94117647058825,60.31976744186046,70.32704167445337
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,83.92156862745098,58.54651162790698,77.64903756307233
|
5 |
-
SALMONN_7B,83.48039215686273,59.24418604651163,
|
6 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,64.86264249672958
|
7 |
-
|
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test
|
2 |
MERaLiON_AudioLLM_v1,82.94117647058825,60.31976744186046,70.32704167445337
|
3 |
+
Qwen-Audio-Chat,79.36274509803921,63.16860465116279,64.8327415436367
|
4 |
wavllm_fairseq,83.92156862745098,58.54651162790698,77.64903756307233
|
5 |
+
SALMONN_7B,83.48039215686273,59.24418604651163,66.39506634273968
|
6 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,64.86264249672958
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,88.57843137254902,73.11046511627907,88.61894972902262
|
results/llama3_70b_judge_binary/er.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
MERaLiON_AudioLLM_v1,43.72509960159363,42.26053639846744,30.15325670498084
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,,50.076628352490424,41.0727969348659
|
5 |
-
SALMONN_7B,23.904382470119522
|
6 |
-
Qwen2-Audio-7B-Instruct
|
7 |
-
|
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
MERaLiON_AudioLLM_v1,43.72509960159363,42.26053639846744,30.15325670498084
|
3 |
+
Qwen-Audio-Chat,29.482071713147413,44.980842911877396,50.65134099616858
|
4 |
wavllm_fairseq,,50.076628352490424,41.0727969348659
|
5 |
+
SALMONN_7B,23.904382470119522,42.06896551724138,30.727969348659002
|
6 |
+
Qwen2-Audio-7B-Instruct,53.884462151394416,53.486590038314176,40.536398467432946
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,44.322709163346616,56.666666666666664,47.394636015325666
|
results/llama3_70b_judge_binary/gr.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
MERaLiON_AudioLLM_v1,99.52810832991383,96.81274900398407
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,69.67583093967994,
|
5 |
-
SALMONN_7B
|
6 |
Qwen2-Audio-7B-Instruct,99.1177677472302,98.40637450199203
|
7 |
-
|
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
MERaLiON_AudioLLM_v1,99.52810832991383,96.81274900398407
|
3 |
+
Qwen-Audio-Chat,70.5990972507181,50.298804780876495
|
4 |
wavllm_fairseq,69.67583093967994,
|
5 |
+
SALMONN_7B,88.81821912187115,85.95617529880478
|
6 |
Qwen2-Audio-7B-Instruct,99.1177677472302,98.40637450199203
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,35.248256052523594,15.737051792828685
|
results/llama3_70b_judge_binary/sqa.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
MERaLiON_AudioLLM_v1,85.02862175253192,79.09043387349712
|
3 |
-
Qwen-Audio-Chat
|
4 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
5 |
-
SALMONN_7B
|
6 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
7 |
-
|
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
MERaLiON_AudioLLM_v1,85.02862175253192,79.09043387349712
|
3 |
+
Qwen-Audio-Chat,63.32012329370321,59.69681129116571
|
4 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
5 |
+
SALMONN_7B,50.902686041391455,56.821745948771564
|
6 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,91.85380889476001,89.33612127548353
|
results/meteor/ac.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
-
MERaLiON_AudioLLM_v1
|
3 |
-
Qwen-Audio-Chat
|
4 |
-
wavllm_fairseq
|
5 |
-
SALMONN_7B
|
6 |
-
Qwen2-Audio-7B-Instruct
|
7 |
-
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
+
MERaLiON_AudioLLM_v1,0.20729494848965185,0.27308927892270785
|
3 |
+
Qwen-Audio-Chat,0.27553015076950976,0.2355106805560457
|
4 |
+
wavllm_fairseq,0.041732965094428545,0.06399522524688675
|
5 |
+
SALMONN_7B,0.20994052484339956,0.17175112770658157
|
6 |
+
Qwen2-Audio-7B-Instruct,0.19891712076314283,0.21342294856199182
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.05796819723943051,0.120421856260385
|
results/wer/asr.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test
|
2 |
-
MERaLiON_AudioLLM_v1,0.0316520164484853,0.05489246599958386,0.0987204452413545,0.2529415939486267,0.16893672174464575,0.17487020711276138,0.20473616818080898,0.0931123806820198,0.12051054777521716
|
3 |
-
Qwen-Audio-Chat
|
4 |
-
wavllm_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264
|
5 |
-
SALMONN_7B,0.10270871845172973,0.09671439650443565
|
6 |
-
Qwen2-Audio-7B-Instruct,0.035141660693401744,0.060415760304159495,0.11438872500819404,0.2165498391593041,0.11723812890302816,0.18872219319407232
|
7 |
-
|
|
|
1 |
+
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test
|
2 |
+
MERaLiON_AudioLLM_v1,0.0316520164484853,0.05489246599958386,0.0987204452413545,0.2529415939486267,0.16893672174464575,0.17487020711276138,0.20473616818080898,0.0931123806820198,0.12051054777521716
|
3 |
+
Qwen-Audio-Chat,0.020258799562379748,0.043467569561352074,0.11272421128398918,0.31419144746723354,0.13018910022587737,0.2655529121410546,0.3664994875132684,0.04052375714133636,0.2911540507002305
|
4 |
+
wavllm_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264
|
5 |
+
SALMONN_7B,0.10270871845172973,0.09671439650443565,0.3062255383962828,0.23699946689025367,0.10765150204693537,0.2577708974886327,0.3597423676988383,0.0459884319222171,0.14231519234178336
|
6 |
+
Qwen2-Audio-7B-Instruct,0.035141660693401744,0.060415760304159495,0.11438872500819404,0.2165498391593041,0.11723812890302816,0.18872219319407232,0.23542555661330924,0.06114048472375004,0.08739585179932637
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.032349945297468596,0.05307658841999735,0.10600831614192711,0.20140159998943682,0.09948381629977261,0.11416493424197618,0.1448629161356777,0.04900464852205386,0.04396383619925545
|
results/wer/cnasr.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
MERaLiON_AudioLLM_v1,0.18824612460652135
|
3 |
-
Qwen-Audio-Chat,
|
4 |
wavllm_fairseq,0.7054601967888183
|
5 |
SALMONN_7B,0.8259290055631446
|
6 |
Qwen2-Audio-7B-Instruct,0.09260359129694522
|
7 |
-
|
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
MERaLiON_AudioLLM_v1,0.18824612460652135
|
3 |
+
Qwen-Audio-Chat,0.9469917443725129
|
4 |
wavllm_fairseq,0.7054601967888183
|
5 |
SALMONN_7B,0.8259290055631446
|
6 |
Qwen2-Audio-7B-Instruct,0.09260359129694522
|
7 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.20886539565639167
|