Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/bleu/st.csv +0 -4
- results/llama3_70b_judge/ac.csv +0 -4
- results/llama3_70b_judge/aqa.csv +0 -4
- results/llama3_70b_judge/ar.csv +0 -4
- results/llama3_70b_judge/si.csv +0 -4
- results/llama3_70b_judge/sqa.csv +0 -4
- results/llama3_70b_judge_binary/er.csv +0 -4
- results/llama3_70b_judge_binary/gr.csv +0 -4
- results/llama3_70b_judge_binary/sqa.csv +0 -4
- results/meteor/ac.csv +0 -4
- results/wer/asr.csv +0 -4
- results/wer/cnasr.csv +0 -4
results/bleu/st.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
-
AudioGemma2_v1,,,,,,
|
3 |
-
test_temp,,,,,,
|
4 |
-
audiogemma_2_singlish,,,,,,
|
5 |
salmonn_7b,14.135708265627702,34.055681569415356,0.00045042243733421275,27.877196356747618,5.2319724257915645,0.4256092994569619
|
6 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
7 |
-
old_models,,,,,,
|
8 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,,,,,,
|
10 |
mowe_audio,,,,,,
|
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
|
|
|
|
|
|
2 |
salmonn_7b,14.135708265627702,34.055681569415356,0.00045042243733421275,27.877196356747618,5.2319724257915645,0.4256092994569619
|
3 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
|
|
4 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,,,,,,
|
6 |
mowe_audio,,,,,,
|
results/llama3_70b_judge/ac.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
-
AudioGemma2_v1,,
|
3 |
-
test_temp,,
|
4 |
-
audiogemma_2_singlish,,
|
5 |
salmonn_7b,34.372727272727275,21.15606936416185
|
6 |
wavllm_fairseq,5.5,6.901734104046243
|
7 |
-
old_models,,
|
8 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,2.4727272727272727,3.445086705202312
|
10 |
mowe_audio,41.20909090909091,23.294797687861273
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
|
|
|
|
|
|
2 |
salmonn_7b,34.372727272727275,21.15606936416185
|
3 |
wavllm_fairseq,5.5,6.901734104046243
|
|
|
4 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,2.4727272727272727,3.445086705202312
|
6 |
mowe_audio,41.20909090909091,23.294797687861273
|
results/llama3_70b_judge/aqa.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
-
AudioGemma2_v1,,,
|
3 |
-
test_temp,,,
|
4 |
-
audiogemma_2_singlish,,,
|
5 |
salmonn_7b,55.75299866725899,47.02875399361022,46.25
|
6 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
7 |
-
old_models,,,
|
8 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,29.47134606841404,17.380191693290733,16.710526315789473
|
10 |
mowe_audio,62.221235006663704,32.97124600638978,28.88157894736842
|
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
|
|
|
|
|
|
2 |
salmonn_7b,55.75299866725899,47.02875399361022,46.25
|
3 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
|
|
4 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,29.47134606841404,17.380191693290733,16.710526315789473
|
6 |
mowe_audio,62.221235006663704,32.97124600638978,28.88157894736842
|
results/llama3_70b_judge/ar.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,voxceleb_accent_test
|
2 |
-
AudioGemma2_v1,
|
3 |
-
test_temp,
|
4 |
-
audiogemma_2_singlish,
|
5 |
salmonn_7b,37.443578169881
|
6 |
wavllm_fairseq,
|
7 |
-
old_models,
|
8 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,39.32704144439885
|
10 |
mowe_audio,23.68485843249897
|
|
|
1 |
Model,voxceleb_accent_test
|
|
|
|
|
|
|
2 |
salmonn_7b,37.443578169881
|
3 |
wavllm_fairseq,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,39.32704144439885
|
6 |
mowe_audio,23.68485843249897
|
results/llama3_70b_judge/si.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
-
AudioGemma2_v1,,
|
3 |
-
test_temp,,
|
4 |
-
audiogemma_2_singlish,,
|
5 |
salmonn_7b,19.2,12.4
|
6 |
wavllm_fairseq,22.400000000000002,21.6
|
7 |
-
old_models,,
|
8 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,63.0,70.8
|
10 |
mowe_audio,16.0,19.8
|
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
|
|
|
|
|
|
2 |
salmonn_7b,19.2,12.4
|
3 |
wavllm_fairseq,22.400000000000002,21.6
|
|
|
4 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,63.0,70.8
|
6 |
mowe_audio,16.0,19.8
|
results/llama3_70b_judge/sqa.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_v1
|
2 |
-
AudioGemma2_v1,,,
|
3 |
-
test_temp,,,
|
4 |
-
audiogemma_2_singlish,,,
|
5 |
salmonn_7b,78.23529411764706,56.77325581395348,
|
6 |
wavllm_fairseq,83.92156862745098,58.54651162790698,
|
7 |
-
old_models,,,
|
8 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,82.99019607843137,64.94186046511628,
|
10 |
mowe_audio,76.5686274509804,,
|
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_v1
|
|
|
|
|
|
|
2 |
salmonn_7b,78.23529411764706,56.77325581395348,
|
3 |
wavllm_fairseq,83.92156862745098,58.54651162790698,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,82.99019607843137,64.94186046511628,
|
6 |
mowe_audio,76.5686274509804,,
|
results/llama3_70b_judge_binary/er.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
-
AudioGemma2_v1,,,
|
3 |
-
test_temp,,,
|
4 |
-
audiogemma_2_singlish,,,
|
5 |
salmonn_7b,21.55688622754491,41.877394636015325,33.06513409961686
|
6 |
wavllm_fairseq,45.90818363273453,50.076628352490424,41.0727969348659
|
7 |
-
old_models,,,
|
8 |
Qwen2-Audio-7B-Instruct,49.30139720558882,53.486590038314176,40.536398467432946
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,34.4311377245509,43.86973180076628,33.25670498084291
|
10 |
mowe_audio,23.55289421157685,46.09195402298851,30.07662835249042
|
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
|
|
|
|
|
|
2 |
salmonn_7b,21.55688622754491,41.877394636015325,33.06513409961686
|
3 |
wavllm_fairseq,45.90818363273453,50.076628352490424,41.0727969348659
|
|
|
4 |
Qwen2-Audio-7B-Instruct,49.30139720558882,53.486590038314176,40.536398467432946
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,34.4311377245509,43.86973180076628,33.25670498084291
|
6 |
mowe_audio,23.55289421157685,46.09195402298851,30.07662835249042
|
results/llama3_70b_judge_binary/gr.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
-
AudioGemma2_v1,,
|
3 |
-
test_temp,,
|
4 |
-
audiogemma_2_singlish,,
|
5 |
salmonn_7b,88.90028723840788,51.59680638722555
|
6 |
wavllm_fairseq,69.67583093967994,
|
7 |
-
old_models,,
|
8 |
Qwen2-Audio-7B-Instruct,99.1177677472302,49.30139720558882
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,53.40582683627411,51.49700598802395
|
10 |
mowe_audio,81.20640131308986,37.82435129740519
|
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
|
|
|
|
|
|
2 |
salmonn_7b,88.90028723840788,51.59680638722555
|
3 |
wavllm_fairseq,69.67583093967994,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,99.1177677472302,49.30139720558882
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,53.40582683627411,51.49700598802395
|
6 |
mowe_audio,81.20640131308986,37.82435129740519
|
results/llama3_70b_judge_binary/sqa.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
-
AudioGemma2_v1,,
|
3 |
-
test_temp,,
|
4 |
-
audiogemma_2_singlish,,
|
5 |
salmonn_7b,50.5063848524879,55.933089388395196
|
6 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
7 |
-
old_models,,
|
8 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,85.24878907970057,86.0951385258756
|
10 |
mowe_audio,75.3412593571114,
|
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
|
|
|
|
|
|
2 |
salmonn_7b,50.5063848524879,55.933089388395196
|
3 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
|
|
4 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,85.24878907970057,86.0951385258756
|
6 |
mowe_audio,75.3412593571114,
|
results/meteor/ac.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
-
AudioGemma2_v1,,
|
3 |
-
test_temp,,
|
4 |
-
audiogemma_2_singlish,,
|
5 |
salmonn_7b,,
|
6 |
wavllm_fairseq,,
|
7 |
-
old_models,,
|
8 |
Qwen2-Audio-7B-Instruct,,
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,,
|
10 |
mowe_audio,,
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
|
|
|
|
|
|
2 |
salmonn_7b,,
|
3 |
wavllm_fairseq,,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,,
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,,
|
6 |
mowe_audio,,
|
results/wer/asr.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test,imda_part1_asr_test,imda_part2_asr_test
|
2 |
-
AudioGemma2_v1,0.03089749877390878,0.06278018423591276,0.10836691280904365,0.2604876551258585,0.10853395305460918,0.16500928283176397,,,,,
|
3 |
-
test_temp,,,,,,,,,,,
|
4 |
-
audiogemma_2_singlish,0.026653336854415816,0.05710556679970492,0.10400318764018227,0.2569853056468371,0.10561083409168967,0.1646007270625118,,,,,
|
5 |
salmonn_7b,0.555834307918663,0.4180490665254317,0.3375363911542985,0.34335568653146914,0.14226764839341272,0.2687240831119165,0.3638199574803293,0.08558958163301515,0.18386810849140223,0.1581255622670608,0.6915082052617869
|
6 |
wavllm_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264,0.07762498393522684,0.4511070591299818
|
7 |
-
old_models,,,,,,,,,,,
|
8 |
Qwen2-Audio-7B-Instruct,0.03201041234390915,0.06073732195887794,0.11438872500819404,0.22323599266520214,0.11891779417331638,0.9865371146513101,0.9884312836905533,0.06387282211419042,0.953518879631271,,
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,0.01831591655034519,0.03714982881570734,0.09876543209876543,0.14540692118393275,0.09515429104337297,0.11773910240019567,0.15611126487402763,0.038146268762641496,0.04754476156709803,0.0464432592211798,0.2904141703568638
|
10 |
mowe_audio,0.023258007318821442,0.05544101235175062,0.5609539784448686,0.6840078629561348,0.32259099649275863,0.32744494224633414,0.41765995821028074,0.2092544622263227,0.16780712639602907,,
|
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test,imda_part1_asr_test,imda_part2_asr_test
|
|
|
|
|
|
|
2 |
salmonn_7b,0.555834307918663,0.4180490665254317,0.3375363911542985,0.34335568653146914,0.14226764839341272,0.2687240831119165,0.3638199574803293,0.08558958163301515,0.18386810849140223,0.1581255622670608,0.6915082052617869
|
3 |
wavllm_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264,0.07762498393522684,0.4511070591299818
|
|
|
4 |
Qwen2-Audio-7B-Instruct,0.03201041234390915,0.06073732195887794,0.11438872500819404,0.22323599266520214,0.11891779417331638,0.9865371146513101,0.9884312836905533,0.06387282211419042,0.953518879631271,,
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,0.01831591655034519,0.03714982881570734,0.09876543209876543,0.14540692118393275,0.09515429104337297,0.11773910240019567,0.15611126487402763,0.038146268762641496,0.04754476156709803,0.0464432592211798,0.2904141703568638
|
6 |
mowe_audio,0.023258007318821442,0.05544101235175062,0.5609539784448686,0.6840078629561348,0.32259099649275863,0.32744494224633414,0.41765995821028074,0.2092544622263227,0.16780712639602907,,
|
results/wer/cnasr.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
-
AudioGemma2_v1,
|
3 |
-
test_temp,
|
4 |
-
audiogemma_2_singlish,
|
5 |
salmonn_7b,0.8527647443131199
|
6 |
wavllm_fairseq,0.7054601967888183
|
7 |
-
old_models,
|
8 |
Qwen2-Audio-7B-Instruct,0.09317772366415236
|
9 |
whisper_large_v3_with_llama_3_8b_instruct,
|
10 |
mowe_audio,1.0174318465284788
|
|
|
1 |
Model,aishell_asr_zh_test
|
|
|
|
|
|
|
2 |
salmonn_7b,0.8527647443131199
|
3 |
wavllm_fairseq,0.7054601967888183
|
|
|
4 |
Qwen2-Audio-7B-Instruct,0.09317772366415236
|
5 |
whisper_large_v3_with_llama_3_8b_instruct,
|
6 |
mowe_audio,1.0174318465284788
|