diff --git a/.gitattributes b/.gitattributes index e2fdd3c78bfef7d609c3b755310e6439737447f2..0cec2425e9d79f7f0bbf28b0c229a2b6b166c6ac 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,21 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings21-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings21-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings21-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings22-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings22-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Earnings22-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Longform-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Longform-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Longform-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-v1/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-v1/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-v1/sample_2.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b3b064886d4b037b381a86c97e2f678c1a777048 --- /dev/null +++ b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff53d7bcff873fea97f02e971836f24adfbfba9ebdcfc7554423bf65cb110e1d +size 970312 diff --git a/examples/AC/AudioCaps-Test/dataset_info.json b/examples/AC/AudioCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..2b8cfe680340090d702b3d6b88a8378e4b9449fc --- /dev/null +++ b/examples/AC/AudioCaps-Test/dataset_info.json @@ -0,0 +1,142 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..4331131a6543acb7d4f6e69a71708518f65bc034 Binary files /dev/null and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..2fdb280d6d9a2912c0aad84bc423a3b17ab4acc7 Binary files /dev/null and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..bd06790c5bba36aa0c69693e84c4bb40cdfbc290 Binary files /dev/null and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..65cc6f6b6acacba97b729c23d8639556e02847f6 --- /dev/null +++ b/examples/AC/AudioCaps-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "0e301916c3676d35", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/AC/WavCaps-Test/data-00000-of-00001.arrow b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8c9b17e83153dcec5bdd46f699aa5f2cee3eeeb8 --- /dev/null +++ b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c824145183da91da498c0338860b6daeaed94d7c619ec9ae22a041918c61c902 +size 985360 diff --git a/examples/AC/WavCaps-Test/dataset_info.json b/examples/AC/WavCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..0854513dfb2583899f6f4539b14e6b2982daf86b --- /dev/null +++ b/examples/AC/WavCaps-Test/dataset_info.json @@ -0,0 +1,138 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a76589f0898c5033ab99b0a15f34564fcd4e0d89 Binary files /dev/null and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..011a74daa10980002a95da355e3534e42f69bf6c Binary files /dev/null and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..d8afaa47b29c26dd49fbc93a95e3d8310a402a11 Binary files /dev/null and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1157a81d2f767520f45c0be2fe9bab4ff33f96b2 --- /dev/null +++ b/examples/AC/WavCaps-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1e570096603c2a32", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow b/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5db71d1ed8ec68e5efe54c361c696a396cf4e8aa --- /dev/null +++ b/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c07c0e241fb703dda0b7544925d0ca59bba57417daa5e08727a9edd9312e64 +size 578840 diff --git a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..50bebce9f3ed95a5e075f901d4038bf769c46d49 --- /dev/null +++ b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..4108fd6ea1fac52d76f936d237a7199833266c0b Binary files /dev/null and b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..aa2f6e1d60f18910793c3ec63d2107616a88ecc4 Binary files /dev/null and b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..f903c6c5ae2939f8f523c3113d25df8b1bb74d7d Binary files /dev/null and b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/state.json b/examples/AR/VoxCeleb-Accent-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c69c32faf3ae084e16287c436f540a16578faf04 --- /dev/null +++ b/examples/AR/VoxCeleb-Accent-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7017504c8eeb5d71", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASQA/AudioCaps-QA-Test/data-00000-of-00001.arrow b/examples/ASQA/AudioCaps-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..cf7c40d2f99c0ed01151e80489a2c4347f5ab629 --- /dev/null +++ b/examples/ASQA/AudioCaps-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f33a60a23ffc8fce090307d530ef03b2b8cf9852fa70418e76ed6a1d5dd978 +size 954480 diff --git a/examples/ASQA/AudioCaps-QA-Test/dataset_info.json b/examples/ASQA/AudioCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..927e9073f4fbf7e4c1b46bca4fd2f1dc1cb18fa6 --- /dev/null +++ b/examples/ASQA/AudioCaps-QA-Test/dataset_info.json @@ -0,0 +1,146 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASQA/AudioCaps-QA-Test/sample_0.wav b/examples/ASQA/AudioCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..773c98035b78950ae26994d76316a4a602446e17 Binary files /dev/null and b/examples/ASQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/ASQA/AudioCaps-QA-Test/sample_1.wav b/examples/ASQA/AudioCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..6963878bb2d524f1967934a9c615126ff93481b3 Binary files /dev/null and b/examples/ASQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/ASQA/AudioCaps-QA-Test/sample_2.wav b/examples/ASQA/AudioCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..40a5d2bd7b527602fccad9f3ff5e9844f0da45e4 Binary files /dev/null and b/examples/ASQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/ASQA/AudioCaps-QA-Test/state.json b/examples/ASQA/AudioCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..19d00ee6e5f98d5468330edb676d8a2f9f4159f0 --- /dev/null +++ b/examples/ASQA/AudioCaps-QA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b4d0bc420173574a", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASQA/Clotho-AQA-Test/data-00000-of-00001.arrow b/examples/ASQA/Clotho-AQA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..df51466ac819e4f3e35ed5426530229825ea480b --- /dev/null +++ b/examples/ASQA/Clotho-AQA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76c13e88e89bb39d6a4dedc9f3bb30e128cdaaa5f68d8c0e1d804d9af5cbf68 +size 2181384 diff --git a/examples/ASQA/Clotho-AQA-Test/dataset_info.json b/examples/ASQA/Clotho-AQA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..45210944633428553291fd44b5717fceb5809e5c --- /dev/null +++ b/examples/ASQA/Clotho-AQA-Test/dataset_info.json @@ -0,0 +1,129 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": {}, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASQA/Clotho-AQA-Test/sample_0.wav b/examples/ASQA/Clotho-AQA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..71db83c4fd13beb21fc12baebabc3115bb12772f Binary files /dev/null and b/examples/ASQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/ASQA/Clotho-AQA-Test/sample_1.wav b/examples/ASQA/Clotho-AQA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e840f36885be4f6991710701ff18840e62d6932e Binary files /dev/null and b/examples/ASQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/ASQA/Clotho-AQA-Test/sample_2.wav b/examples/ASQA/Clotho-AQA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..32101df39657d72e131d224a23f61dde7aa6012f Binary files /dev/null and b/examples/ASQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/ASQA/Clotho-AQA-Test/state.json b/examples/ASQA/Clotho-AQA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..f914ee70c395c738a3b3d929a297026a62b87526 --- /dev/null +++ b/examples/ASQA/Clotho-AQA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "3f05c76553bf311d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASQA/WavCaps-QA-Test/data-00000-of-00001.arrow b/examples/ASQA/WavCaps-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..86ba108852e07ff03044fff3c832a00f39839bbc --- /dev/null +++ b/examples/ASQA/WavCaps-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183517b37463c54c088f5e1011721003d14587380bc111e8ad7f7cfc60fcd9e5 +size 970376 diff --git a/examples/ASQA/WavCaps-QA-Test/dataset_info.json b/examples/ASQA/WavCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..8bf3ee660b297ef30a37b4ab559ea865bf89c9cf --- /dev/null +++ b/examples/ASQA/WavCaps-QA-Test/dataset_info.json @@ -0,0 +1,142 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASQA/WavCaps-QA-Test/sample_0.wav b/examples/ASQA/WavCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c525931f97fb9fffd107ba1f10e02ff348ee733a Binary files /dev/null and b/examples/ASQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/ASQA/WavCaps-QA-Test/sample_1.wav b/examples/ASQA/WavCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..cc63fae1bd2bda6151ab60da2b2f7903191a03f3 Binary files /dev/null and b/examples/ASQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/ASQA/WavCaps-QA-Test/sample_2.wav b/examples/ASQA/WavCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..27706824fec806662482ae7903c969db8a96c7ba Binary files /dev/null and b/examples/ASQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/ASQA/WavCaps-QA-Test/state.json b/examples/ASQA/WavCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..30c3a60665bed0ad7c6140b5086efce6fdd486ec --- /dev/null +++ b/examples/ASQA/WavCaps-QA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f88396310248e252", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9ebb255230c929cad6fcfff2388a8c5fd5cbef50 --- /dev/null +++ b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44587f333a31075993f3cc8832f8946fd59605246769dd8bf86887dcf3bd889c +size 543912 diff --git a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..0ee97efe251f1c21a87ef3c09372d7249c0d657c --- /dev/null +++ b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json @@ -0,0 +1,170 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "accents": { + "dtype": "null", + "_type": "Value" + }, + "age": { + "dtype": "null", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "down_votes": { + "dtype": "int64", + "_type": "Value" + }, + "gender": { + "dtype": "null", + "_type": "Value" + }, + "language": { + "dtype": "string", + "_type": "Value" + }, + "locale": { + "dtype": "string", + "_type": "Value" + }, + "segment": { + "dtype": "null", + "_type": "Value" + }, + "up_votes": { + "dtype": "int64", + "_type": "Value" + }, + "variant": { + "dtype": "null", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..0037bc557881e74bdcc687e1d28af287a6c22344 Binary files /dev/null and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..bc3c6afd5fe31546b091231f643b37f05ba11595 Binary files /dev/null and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..4245350f9951353f48a156651b2b0bc3629ae84a Binary files /dev/null and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0dc2de30ddac4cca90d5224840099629052ffe14 --- /dev/null +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "30218d56801da2e8", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c99c2a3f84c01c3b2ad7555d79c55dbdb2ed3da2 --- /dev/null +++ b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33dce5920f131621276ca79ef824fc52d5d5bb043d7672458fdbe037c7fb60fd +size 310572568 diff --git a/examples/ASR/Earnings21-Test/dataset_info.json b/examples/ASR/Earnings21-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..08b3d046e3630b0d47e267cf866bc1ce79106af7 --- /dev/null +++ b/examples/ASR/Earnings21-Test/dataset_info.json @@ -0,0 +1,134 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Earnings21-Test/sample_0.wav b/examples/ASR/Earnings21-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..00b589a907fb97c85923495ea5e471267435498a --- /dev/null +++ b/examples/ASR/Earnings21-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e88eccc61c5e8a5e62867c52a8ccfc4e108a5f48459f7df0eb1e9ae7f16d4f +size 139072236 diff --git a/examples/ASR/Earnings21-Test/sample_1.wav b/examples/ASR/Earnings21-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..41480ae277b97f832773cd9d05fb187bb9e6c0e4 --- /dev/null +++ b/examples/ASR/Earnings21-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd3ce7847b7fb0f600bb3faeca3cd2a8f4992fd1de17bd831ef6ccfb2623f33 +size 55065644 diff --git a/examples/ASR/Earnings21-Test/sample_2.wav b/examples/ASR/Earnings21-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..17ba2bbd1608c96c62a420bd267cf9270da8f035 --- /dev/null +++ b/examples/ASR/Earnings21-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607725d2a33f206a0f00668f1907f4d997fc7dbe7d1e1f9c82045496412bd8bd +size 115039268 diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..206bda8429e14fa71c1fbe9ddf26f730e0359b97 --- /dev/null +++ b/examples/ASR/Earnings21-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2e3dea299b387757", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..01756fb35be56da03615aa94008d40f4567e2403 --- /dev/null +++ b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a45b6f2b6f1eaefc76f48f4778b5d5d46793e0a0fc846b1c2dd4b639dd173f +size 299513312 diff --git a/examples/ASR/Earnings22-Test/dataset_info.json b/examples/ASR/Earnings22-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..08b3d046e3630b0d47e267cf866bc1ce79106af7 --- /dev/null +++ b/examples/ASR/Earnings22-Test/dataset_info.json @@ -0,0 +1,134 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Earnings22-Test/sample_0.wav b/examples/ASR/Earnings22-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d637495fa7239cdf1a837a632f3d1b3b02b7b16a --- /dev/null +++ b/examples/ASR/Earnings22-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df99676242890437003f9de1ebe2df3ea1e55365a76eab1b08bbe5ef6c235bf0 +size 39828524 diff --git a/examples/ASR/Earnings22-Test/sample_1.wav b/examples/ASR/Earnings22-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0e5a289da58dd8768e1e3e4ef3858f2c6b5d7e7e --- /dev/null +++ b/examples/ASR/Earnings22-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46cd00f65ad5466bbe58fcbaa45dbbdc62921f876890b78da515b3574178691 +size 140347052 diff --git a/examples/ASR/Earnings22-Test/sample_2.wav b/examples/ASR/Earnings22-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..45f6f543dc102d5d73cb768a62c2e9e1846ba74c --- /dev/null +++ b/examples/ASR/Earnings22-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7144b858f3de344f601031d093c3403199dc04cad49287cf71dd6ea04ca4dd6 +size 118169900 diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..643700cb9278c071e7a0ca46b2760cf18034ad11 --- /dev/null +++ b/examples/ASR/Earnings22-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c2ddf91e8ccb230c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..80e7846cdbade2e98a13a8439818fb634d13fbc3 --- /dev/null +++ b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573e5fd3cdcb0c5b27f9958ba68113a455fa426dec336f7635677c3130a265fe +size 693672 diff --git a/examples/ASR/GigaSpeech-Test/dataset_info.json b/examples/ASR/GigaSpeech-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..7056dd861bf6cf034028f9f01c5153ac3e4ec304 --- /dev/null +++ b/examples/ASR/GigaSpeech-Test/dataset_info.json @@ -0,0 +1,162 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_id": { + "dtype": "string", + "_type": "Value" + }, + "begin_time": { + "dtype": "float64", + "_type": "Value" + }, + "category": { + "dtype": "int64", + "_type": "Value" + }, + "end_time": { + "dtype": "float64", + "_type": "Value" + }, + "segment_id": { + "dtype": "string", + "_type": "Value" + }, + "source": { + "dtype": "int64", + "_type": "Value" + }, + "speaker": { + "dtype": "string", + "_type": "Value" + }, + "url": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..f2afdc748f9c58eb2e6cd9793b8a642471d23c31 Binary files /dev/null and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..942b01315d851d0ed1a72ae561e6859c9fbb32fd Binary files /dev/null and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..0e61bad88fd0422f1f69e07c1d79da115df77c7e Binary files /dev/null and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..e61469ffa3f7414712f1f7fa7fa6525b49e5cac4 --- /dev/null +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "0032f92a85e94025", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c7bd43926e41e9768d8da89f5bb2a46ff1a302d2 --- /dev/null +++ b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136cd0551a26eb0407715350491a49cd3c80cf8440c0c5d6b5079bdf1d81d260 +size 382752 diff --git a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5649fa6ac42fc9645c7aea4be9d03cc30d774647 --- /dev/null +++ b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..9f1b0abb84dc9bb30a04f2b08eb0f0be0ec33cf4 Binary files /dev/null and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..2f5b49a926a9509ad34700c9d00df8bbda49fd57 Binary files /dev/null and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..f36eb9efe06c44ea5cbd63d26257e8d37f0079b7 Binary files /dev/null and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e63ed10109432151f62c147e30d2704343a756d --- /dev/null +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9b9c1437475afa9d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..7ea3bfcd404232b6ae9582683615a77c2bb3ac25 --- /dev/null +++ b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad482831507358784822a617955723251f237ff24e9dfba98265ebd1bebfacf5 +size 347952 diff --git a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5649fa6ac42fc9645c7aea4be9d03cc30d774647 --- /dev/null +++ b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..fe0422bcb8356543fbde4cc9981779ca712dcc22 Binary files /dev/null and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..4f6ec7d97638278d3cf05791b44d2d072ab161ba Binary files /dev/null and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..d1e955f3a2f6d402e2661703d4880a4881c088c8 Binary files /dev/null and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..2dbed7b83dd6665624b87d3b11c514bc97e22f29 --- /dev/null +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1b048a2e54d0c002", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part3-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part3-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d27425a479726f8ad38bfdbb8b536211af5a0bbc --- /dev/null +++ b/examples/ASR/IMDA-Part3-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db3f223af176f6b578f45ccd0505550aa46fd3eaba7177ff81acca390869b94 +size 150304 diff --git a/examples/ASR/IMDA-Part3-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part3-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..6b2fc6f67812ccc6fa242d0fa24b131d95fbfd0e --- /dev/null +++ b/examples/ASR/IMDA-Part3-ASR-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "room": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "age": { + "dtype": "string", + "_type": "Value" + }, + "education_level": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "first_language": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "occupation": { + "dtype": "string", + "_type": "Value" + }, + "partner_id": { + "dtype": "string", + "_type": "Value" + }, + "partner_relationship": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "spoken_language": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part3-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part3-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..cc293ffd19e27ca9c52119bbf86fa7a093965541 Binary files /dev/null and b/examples/ASR/IMDA-Part3-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part3-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part3-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..edfeadb7d81fff2c2d247697a6e151ab70b206ef Binary files /dev/null and b/examples/ASR/IMDA-Part3-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part3-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part3-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..323f64ad55a8996871e74aed993cb0d5ad5e959e Binary files /dev/null and b/examples/ASR/IMDA-Part3-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part3-ASR-Test/state.json b/examples/ASR/IMDA-Part3-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a37aa15934206c94f2cac016805ecd626bd9313 --- /dev/null +++ b/examples/ASR/IMDA-Part3-ASR-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "50379cc99d490806", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part4-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part4-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0bffb4198dcf303f23a46992b32ff5303d950e7b --- /dev/null +++ b/examples/ASR/IMDA-Part4-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c006476408bde2b5af4bd9dcd1d592d1ffc794e88c6628706dfc40f9b5b434 +size 288880 diff --git a/examples/ASR/IMDA-Part4-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part4-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..cdaed03360a03e2929998b99a463813a9ba6b354 --- /dev/null +++ b/examples/ASR/IMDA-Part4-ASR-Test/dataset_info.json @@ -0,0 +1,158 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "lang": { + "dtype": "string", + "_type": "Value" + }, + "room": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "age": { + "dtype": "string", + "_type": "Value" + }, + "dominant_language": { + "dtype": "string", + "_type": "Value" + }, + "education_level": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "first_language": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "occupation": { + "dtype": "string", + "_type": "Value" + }, + "partner_id": { + "dtype": "string", + "_type": "Value" + }, + "partner_relationship": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "spoken_language": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part4-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part4-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..cc93315b57723168a5dddbcccef96e923fe33031 Binary files /dev/null and b/examples/ASR/IMDA-Part4-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part4-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part4-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e3df575cbb0db4922e7d1e219f8e3aae1eb43f59 Binary files /dev/null and b/examples/ASR/IMDA-Part4-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part4-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part4-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8840e9cc8191fb2a9e069a617d8d610f9baf4721 Binary files /dev/null and b/examples/ASR/IMDA-Part4-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part4-ASR-Test/state.json b/examples/ASR/IMDA-Part4-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..322bbde56153280ecb31d565c72d6ccf687b6985 --- /dev/null +++ b/examples/ASR/IMDA-Part4-ASR-Test/state.json @@ -0,0 +1,21 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "dd44449fbde55a5c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part5-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part5-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..a1aaae8fcaa7f10980ed9e9601e35071bd9dc2b3 --- /dev/null +++ b/examples/ASR/IMDA-Part5-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067d7e58f5895c6d731f7d5184c1cbd8240882e8e70e0d1be5692604c19a959f +size 1187072 diff --git a/examples/ASR/IMDA-Part5-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part5-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..b1af0caaaadbd9314186d7c2a4b99f8d76a0949c --- /dev/null +++ b/examples/ASR/IMDA-Part5-ASR-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "senario": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "age": { + "dtype": "string", + "_type": "Value" + }, + "dominant_language": { + "dtype": "string", + "_type": "Value" + }, + "education_level": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "first_language": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "occupation": { + "dtype": "string", + "_type": "Value" + }, + "partner_relationship": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "spoken_language": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part5-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part5-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..b82062c90409fc2b3772aaa02b4b99b00c85ef75 Binary files /dev/null and b/examples/ASR/IMDA-Part5-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part5-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part5-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..1faefc12bfb26af9557a2b6c54357f1e9045f2c7 Binary files /dev/null and b/examples/ASR/IMDA-Part5-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part5-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part5-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..5ad8d5fd9395f9a43ec23875f76b860afd799da7 Binary files /dev/null and b/examples/ASR/IMDA-Part5-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part5-ASR-Test/state.json b/examples/ASR/IMDA-Part5-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..92f6880898150573e9e0d672c9335936367e102e --- /dev/null +++ b/examples/ASR/IMDA-Part5-ASR-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4eaf94926503a7d0", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part6-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part6-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4ca9f43c9d1bb5c38051a1ad8ed4b36afddebef3 --- /dev/null +++ b/examples/ASR/IMDA-Part6-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08457f427ddcaff004b982657d275b900997fbef03bf998c4aa43f20b994de4b +size 468952 diff --git a/examples/ASR/IMDA-Part6-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part6-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..62322a16566086537753d085921866255a2dae72 --- /dev/null +++ b/examples/ASR/IMDA-Part6-ASR-Test/dataset_info.json @@ -0,0 +1,114 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "senario": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/IMDA-Part6-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part6-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ae09b354abd6f045d6125267c91fdc0b5935ccc7 Binary files /dev/null and b/examples/ASR/IMDA-Part6-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part6-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part6-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..6e80c227815625fe9c5057ce074e0d091de68630 Binary files /dev/null and b/examples/ASR/IMDA-Part6-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part6-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part6-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..6b280ebe82ad68bdefc31ffed4497229141ce714 Binary files /dev/null and b/examples/ASR/IMDA-Part6-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part6-ASR-Test/state.json b/examples/ASR/IMDA-Part6-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3e26020446a936e1b8ecf11c672bcd5287efe6 --- /dev/null +++ b/examples/ASR/IMDA-Part6-ASR-Test/state.json @@ -0,0 +1,21 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1e7481d3593b76f4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d700f9546d3292dede0d5f946725fee43d739733 --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a1a86f5cd34ee42918c78eef50f95a53953fd352fb0ecc758543025634d2b7 +size 555160 diff --git a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..b7936842ee323be9e4c2360418e44a417cf0f626 --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -0,0 +1,146 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..266ab3ddafe3c5d192606c6cdf579238b0150e62 Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..238f6c95cf91c6bff49636351557961b3e06d356 Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..f127981ac0a35d93e4fcf3607a84858e9ea276d9 Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1c2c843a1f7da02ed60cbeb427026c76972e7d --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c56d3af03a1dc565", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8e4284b54778666bc3ce9cf7c54e31d950dd8383 --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87fbeb401741f3fe2c384cd859b4065c339239cce8d5718ab8232d873f00a75 +size 873440 diff --git a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..b7936842ee323be9e4c2360418e44a417cf0f626 --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json @@ -0,0 +1,146 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..9ceae84addeaa1cfd86a5ae164ba3f48c7994c2f Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..5facbda4871665edf9aefcd3e58b6056b296c233 Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..237b56f851d3989319b0fe06ce85878ab69c4850 Binary files /dev/null and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd23da8c954dfb155a1fd333adb7cfbdfcde8eaf --- /dev/null +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5fc28a37097fe19f", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..7a6adf5c59f4187480c64b55f30dcf754db07d1c --- /dev/null +++ b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ee24614187cbaab9f8ded6901791a9aaf5bfdda56a358dd8226be5c5e0462a +size 810976 diff --git a/examples/ASR/Peoples-Speech-Test/dataset_info.json b/examples/ASR/Peoples-Speech-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..0a6ab198f1ae81f599b4abcef59dba0043ab934e --- /dev/null +++ b/examples/ASR/Peoples-Speech-Test/dataset_info.json @@ -0,0 +1,138 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "duration_ms": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..560d3df564e6658b9d7040aff205a47737e16dd2 Binary files /dev/null and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..1513c2587700588fda3c13c14f037bb821dfd6df Binary files /dev/null and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8ab7248ab6278f1ad098d67fee87fec1a934b0bc Binary files /dev/null and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..37d3ef70f3164ef13c8bd4d576e7d8b4eb9a4511 --- /dev/null +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "6203edc47e9a3c56", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9c0fb1b137104b7fedf1980471505c2ae8b630ff --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9264049c1cb1e16e464a0ee978e5386733025f4fd2f135eea86def1600d3826c +size 101452440 diff --git a/examples/ASR/Tedlium3-Longform-Test/dataset_info.json b/examples/ASR/Tedlium3-Longform-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ce0eff747512d4783e49763b36beac50f103dce5 --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/dataset_info.json @@ -0,0 +1,146 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_0.wav b/examples/ASR/Tedlium3-Longform-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..8132b0bb9c72b9c26882c19cdb542c31580e3271 --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4219fea24ab3ffdc256b7f6c5f0d0ce8dcd9eb0f80cf15a9d1b3afa6b6e279 +size 26867124 diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_1.wav b/examples/ASR/Tedlium3-Longform-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..17eccb9b542ec47bf29066074f7435f6b06f42cd --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0361b7dc4a9437a698a1d28fb3e73718810a6d685f7932f2a04997ad0475b182 +size 36730348 diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_2.wav b/examples/ASR/Tedlium3-Longform-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2eff19f1f7ce5c124d20e874f244975410f7113c --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c1ab4c6363f3db0f5fca8bdc5f2eb780522a08da3cd00bf6a4b1ea7e3cc0f8 +size 37362604 diff --git a/examples/ASR/Tedlium3-Longform-Test/state.json b/examples/ASR/Tedlium3-Longform-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a1041e8ced6bf9491bb3d91d8d0f8d5814c98494 --- /dev/null +++ b/examples/ASR/Tedlium3-Longform-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f89ae31db7413bf4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..90b3b6d6bc83f53d0b78e62028dbffda36c2cec3 --- /dev/null +++ b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6501f1e1b94e6fa24eddb4b65dd97ae10f4c3df45db946f3bc3b927fafbca908 +size 633448 diff --git a/examples/ASR/Tedlium3-Test/dataset_info.json b/examples/ASR/Tedlium3-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ce0eff747512d4783e49763b36beac50f103dce5 --- /dev/null +++ b/examples/ASR/Tedlium3-Test/dataset_info.json @@ -0,0 +1,146 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c7bd8c3b68ed10585921e11bee7d24a34339f78c Binary files /dev/null and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..3b684a181c9fd4ac1d95c2fc7988379a26f677cb Binary files /dev/null and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b1ab681404201295b8fb1355c1c29d92147a750e Binary files /dev/null and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0eec8964db7c884c56d55d6cae98406a6c52de4 --- /dev/null +++ b/examples/ASR/Tedlium3-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fde82500f706458b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow b/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0742546a92f1cc2786d08b171675bc65c269cf14 --- /dev/null +++ b/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ba676c2fabf603c469151ad388808411a6293abe2f5bad37536e4e814de359 +size 462192 diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..53a69797f0e56551f11dd7fc754525541592260d --- /dev/null +++ b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -0,0 +1,126 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker": { + "gender": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..e60df4c6f8b76165aa68bc4690c4890d7bb06ca0 Binary files /dev/null and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7e3953ebefb7d00f2f53a128da4132ebb400f17 Binary files /dev/null and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..9e7ace99cf7ee186c5ff75811dd2825faf401818 Binary files /dev/null and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/state.json b/examples/CNASR/Aishell-ASR-ZH-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..362c37b973a7be7bfabe4775b6a93b2f3e038e13 --- /dev/null +++ b/examples/CNASR/Aishell-ASR-ZH-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9df39c289a58da05", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow b/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..018fddbba6a8a26194143f46120bf324bce8d466 --- /dev/null +++ b/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2842c3a761bbf2b7412b1a1bbd39651fe883a81748bbe0002a2ac57967332458 +size 572400 diff --git a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..6ee0bdbe32fc2abb53eb378278667ae7c65706ee --- /dev/null +++ b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..9663058cec25e36c80c34ec1b437201c0ab36405 Binary files /dev/null and b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..d0f66c928e3eec7d9344ae8c1a1b1e953128ded8 Binary files /dev/null and b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..28a9ee62e794164d0e553f55ad644ff6d93439f4 Binary files /dev/null and b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/state.json b/examples/ER/IEMOCAP-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..b9b947f65b2b046e7afd6aff693633ea6b20392a --- /dev/null +++ b/examples/ER/IEMOCAP-Emotion-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "11c1cad506e32e3a", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow b/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dd6933571069f896752c649cfa15f20a460a252e --- /dev/null +++ b/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8189dcd76878abd1e2e5a488f466447cbce4974aaee026b605151c9e8c098f5 +size 437104 diff --git a/examples/ER/MELD-Emotion-Test/dataset_info.json b/examples/ER/MELD-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..d1510e9c9bcd20bf4791aa110420e7c00f194d06 --- /dev/null +++ b/examples/ER/MELD-Emotion-Test/dataset_info.json @@ -0,0 +1,166 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ER/MELD-Emotion-Test/sample_0.wav b/examples/ER/MELD-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..1290f04beb0655f325c70a66d139e91a327d44d6 Binary files /dev/null and b/examples/ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_1.wav b/examples/ER/MELD-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..6102c50ca250d0aea84867bff8a8447030794c74 Binary files /dev/null and b/examples/ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_2.wav b/examples/ER/MELD-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..a14d5451f401e7479f3b88d3d98d3c7542aaf060 Binary files /dev/null and b/examples/ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Emotion-Test/state.json b/examples/ER/MELD-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..5fea2bae49831f0cb5cb4c00d5d87f97de7c6781 --- /dev/null +++ b/examples/ER/MELD-Emotion-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b26eb8a553b6391d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow b/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c6038d789a87c8ebbf04acd99d344ba57b09086c --- /dev/null +++ b/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303e3113bb1fdf1c0eb890a96c65376f5324eea8b91483a933eee3b5665f91f4 +size 168240 diff --git a/examples/ER/MELD-Sentiment-Test/dataset_info.json b/examples/ER/MELD-Sentiment-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..d1510e9c9bcd20bf4791aa110420e7c00f194d06 --- /dev/null +++ b/examples/ER/MELD-Sentiment-Test/dataset_info.json @@ -0,0 +1,166 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ER/MELD-Sentiment-Test/sample_0.wav b/examples/ER/MELD-Sentiment-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a41e977ceb90459a322dd63128ffbfbd8bef57d1 Binary files /dev/null and b/examples/ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_1.wav b/examples/ER/MELD-Sentiment-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..2231159f9ca4b08102222cfe0aa216f816bf682d Binary files /dev/null and b/examples/ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_2.wav b/examples/ER/MELD-Sentiment-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..c01afd9a33375108c80c071096b918a45e3f0023 Binary files /dev/null and b/examples/ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/state.json b/examples/ER/MELD-Sentiment-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c151f23c31d06eaed079d3301e8b0dd23727cb93 --- /dev/null +++ b/examples/ER/MELD-Sentiment-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b95b9f7545a82910", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow b/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..cd82ad822e55c545fd965980d523a0d97a73d6f7 --- /dev/null +++ b/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a76d1648889e1da12a58ad0c916c31712239024f97056d771007031a5c3442 +size 517056 diff --git a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..6ee0bdbe32fc2abb53eb378278667ae7c65706ee --- /dev/null +++ b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..b2fdff74b94ec6049256083752dec82c55fdd73f Binary files /dev/null and b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0bc8ed44e57f6996d20cd15880fcdc8e1b6e80c6 Binary files /dev/null and b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..900b477cbf7e25f5ec19c3ec6150fe577d25e600 Binary files /dev/null and b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/state.json b/examples/GR/IEMOCAP-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..56e93d6ac7909fbb8ec066090da0fd4eea1cbaa6 --- /dev/null +++ b/examples/GR/IEMOCAP-Gender-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fd5ddfda82c4aaa9", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow b/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..05bf7296fe347a30bf76b026dd08ab4d79deeba4 --- /dev/null +++ b/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:493b7382a2b1aa901942fa7bcb30ef51797aac39828006a178aefae9ff1a993e +size 622216 diff --git a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..50bebce9f3ed95a5e075f901d4038bf769c46d49 --- /dev/null +++ b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c011ce7fcde186b0e52ac1a4646a86924f2614dc Binary files /dev/null and b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7862b32a295e4d9716f99115f35df0b9188b931 Binary files /dev/null and b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..c4018fc0c75db7e6b96e7ae41af0e1e605d53630 Binary files /dev/null and b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/state.json b/examples/GR/VoxCeleb-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3c601cf05613a921e17d2000e8c76f38f319136f --- /dev/null +++ b/examples/GR/VoxCeleb-Gender-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "38fda24672a2ee02", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c278ed60e5e4eacd0623c14b19f17e3fb6dc9881 --- /dev/null +++ b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cf1b882a819ad04cc16d81b42dd8de6ec402b00537887845c33ce283385eb3 +size 458128 diff --git a/examples/SI/ALPACA-Audio-Test/dataset_info.json b/examples/SI/ALPACA-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..7dd927db4ab336b38a99b686aced46f2e4d9a5cb --- /dev/null +++ b/examples/SI/ALPACA-Audio-Test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..3d516af829afc1afcea07064b2aaaec734175e61 Binary files /dev/null and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..3707bfb5a929acff874947ad6ea76b902ddbc2bf Binary files /dev/null and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..70248c9fd2ff3b5792099536311d05bed53fba47 Binary files /dev/null and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..cba82888d784fef2f53a04b162e712eb4813e7dd --- /dev/null +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b48e6d66699d09cc", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..35154d6b77f4255cc194837e18b4f701795e3c77 --- /dev/null +++ b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7748a3da55014d5c3b955a7eeb7c03a43a6cc8735faba6e4c9c756b2940cdc5a +size 452872 diff --git a/examples/SI/OpenHermes-Audio-Test/dataset_info.json b/examples/SI/OpenHermes-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..454bcaf8f0186d26601a0107af54dc2e906e4091 --- /dev/null +++ b/examples/SI/OpenHermes-Audio-Test/dataset_info.json @@ -0,0 +1,56 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..3a11a8017ed68f552b088939360efbd713fb23d3 Binary files /dev/null and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..c5bb85768cc4a7c6f7206fc9c56412a614d08d9a Binary files /dev/null and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..63ae3d47c286000d5b4180e65a0aa12437dbe7d6 Binary files /dev/null and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..7dd94d080c16d3c9654a1ba57c8142e0b9c1380d --- /dev/null +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2edb418fca8ac348", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0b86442465f821173a22ba9723daf4dc44f8ba2b --- /dev/null +++ b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28bc46da28428d3b73040f668cbc85db1536ca8e44c8e158c671963aad4cb0e4 +size 1918152 diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..c3526b447e28b1c62fece39889011d58962d741f --- /dev/null +++ b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -0,0 +1,124 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_name": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "dtype": "string", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..6923b4c01dd221f14c21f85df1e8165eb5b59bbf Binary files /dev/null and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..750f1b8414060167651ac33128408b510b1545ca Binary files /dev/null and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..0a7aa93de8bade69d708bbf36a8954843b02c2cb Binary files /dev/null and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..75ad396b15b37427ae3dc61c702735f6de828994 --- /dev/null +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a9d1aad6ff31612e", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0b25a112d290d54981e0b560352694831bad5fe9 --- /dev/null +++ b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fecdfef3a25aa2495ab3197b2fe30fa77cfb4c6714b35815f433e52d176d8d9 +size 2217584 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..c4eadac83d7d01b0964e29d70f224e2d0a3a9246 --- /dev/null +++ b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -0,0 +1,138 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "int64", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..3af6793b18199045cab70e74c5ec3ce96d7f16dc --- /dev/null +++ b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899682e40d59cbf79cdd27fe1ff7a108b6349159defaa8bbea9c93cf6f2fedc0 +size 1278380 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..36cb70eb458d7650de9d8fce1d8b46dac6ca53f2 Binary files /dev/null and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..845c1cc2376e7b81c3698c53151bd09c119b79a5 Binary files /dev/null and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..70faed9032446a81505aa07f10cc97f424fd3026 --- /dev/null +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e5f8ea158b4a9ca3", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..59e07ca9393b991fff31f4d506cc0aecb9ad826e --- /dev/null +++ b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5be42e78f6f7b3e6a0e3965bbad83b99ad9dcfbbb60edeba20c2e126afa9c8 +size 3873408 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b865ccbdc90f460864fa658307b1fd6e12b5ac6 --- /dev/null +++ b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Answer Score": { + "dtype": "float64", + "_type": "Value" + }, + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Question Score": { + "dtype": "float64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..bb649d8f669f8cff91e61cfadde01620e5fc6fa9 --- /dev/null +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf01f3c82a571576a2c1e4968142684d1b43a81187fb190d1105a907bb2e90a +size 1400162 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..5c04ae7e9bc5ab1b45a159b3b5928e31db2f2198 --- /dev/null +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b32f4f4294b755fd1d61e0f450f7e11a566fde6d8011d524476569c8ca65d8 +size 1524614 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..19bef4b0a13042e08e5c59f0bdde09868c33bff1 Binary files /dev/null and b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav differ diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..579a4a84b69f43df3d260b1e814859aa751a0126 --- /dev/null +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "77292393d596f5fa", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..1b56d95162403fbb91b0c2fac05154f6167025a3 --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc3fbcd2d7a672e863657eadb4c19c1f83bf2bd6964e41622311b1da913a3d7 +size 5589160 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..d0f327fa13416dff7d7cc45e7f9652d37ea2b296 --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -0,0 +1,179 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "array": { + "feature": { + "dtype": "float64", + "_type": "Value" + }, + "_type": "Sequence" + }, + "path": { + "dtype": "null", + "_type": "Value" + }, + "sampling_rate": { + "dtype": "int64", + "_type": "Value" + } + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "document_id": { + "dtype": "string", + "_type": "Value" + }, + "document_speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "normalized_document_text": { + "dtype": "string", + "_type": "Value" + }, + "normalized_question_text": { + "dtype": "string", + "_type": "Value" + }, + "question_speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..1e8d727fcd2c976bd37a9cab862053a06814e000 --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f611c50ffa4ec236f7652646ef655e6511768b2ed0d1cf0a0e87a89ea05ca46e +size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..faa36bf23e79182c248128fb398f6bb85e5ba4ea --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f753fe113aabdcee26defc81a50f4dc985086c81c44abb2dbbca15c969faa999 +size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..3187166b9ccac7a8793e5f27d3de4feaa3fe24bd --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ad2ff1b82c3abf752a38d5ef6db2ffe35d3b7ce103501d5e17dc190082da14 +size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..5c88181b585af5561ec3cfbd92c9d80a9e2f1f57 --- /dev/null +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "35d15f00aac76485", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow b/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..24538529c4259feb292c0ccd989e952ae1cf58f1 --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75afcf7693963ab137b8504ca60ce1894fceb08fd8d98b921f43c00fe5995820 +size 5069224 diff --git a/examples/SQA/Spoken-Squad-v1/dataset_info.json b/examples/SQA/Spoken-Squad-v1/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..052bbe8b5bd56a75f35ccd52b7495387b446bc83 --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/dataset_info.json @@ -0,0 +1,64 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "paragraph_id": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + }, + "topic_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/Spoken-Squad-v1/sample_0.wav b/examples/SQA/Spoken-Squad-v1/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..182c444046b5c55624be45dec892f8e09cb459e2 --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f40d0fb96afe94b5cbcf3dfbe55d5e92933a7cab1504d1db3f3f5d9cdd7ee1 +size 2091308 diff --git a/examples/SQA/Spoken-Squad-v1/sample_1.wav b/examples/SQA/Spoken-Squad-v1/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..3c93fc036f8b54fd8ffb43c55bb86016ede6a1ea --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2607ab0164db9bd50840415561db20d11fede490862b67fb374a15f808c4ad0 +size 1472300 diff --git a/examples/SQA/Spoken-Squad-v1/sample_2.wav b/examples/SQA/Spoken-Squad-v1/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..96edc703cf7c334dec939e67168c4c2998bb21f6 --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aab7b6ca2c2206cc894962b3b07428670b9c29398ae9fd43822896cf9d84c97 +size 1498412 diff --git a/examples/SQA/Spoken-Squad-v1/state.json b/examples/SQA/Spoken-Squad-v1/state.json new file mode 100644 index 0000000000000000000000000000000000000000..89840561a23f1218e34bb60af320658effae2d2c --- /dev/null +++ b/examples/SQA/Spoken-Squad-v1/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1d8937f6900ae52c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f97b1428cdda8cbfe5624f6ce9382924016dbb6d --- /dev/null +++ b/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1972041f0666a87c87935e25e022bc93cc1562ac6786b268a105eb5eddbbf04 +size 477560 diff --git a/examples/ST/Covost2-EN-ID-test/dataset_info.json b/examples/ST/Covost2-EN-ID-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ef861e441fd29533e55b11f801c64226c20967ab --- /dev/null +++ b/examples/ST/Covost2-EN-ID-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-ID-test/sample_0.wav b/examples/ST/Covost2-EN-ID-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d5e0e0fde0f3a0f727836b4600f298dcce5f5498 Binary files /dev/null and b/examples/ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_1.wav b/examples/ST/Covost2-EN-ID-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..61aa557a0cdcfce43ee5f8372561e10281165065 Binary files /dev/null and b/examples/ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_2.wav b/examples/ST/Covost2-EN-ID-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..11ccbae6576ed4e45dc7474750cefeafcdb868e8 Binary files /dev/null and b/examples/ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/state.json b/examples/ST/Covost2-EN-ID-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..d47a7c80857a3221d45df3297070e256739fb389 --- /dev/null +++ b/examples/ST/Covost2-EN-ID-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5a70ead6af05628d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0940c9ac544aa2c507f2f2e1be24b9b47e6d8b3a --- /dev/null +++ b/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25de54dea0a7a227d63df2da0cc6f69c7b7993c6f740d691aadf19104b1001c +size 1157064 diff --git a/examples/ST/Covost2-EN-TA-test/dataset_info.json b/examples/ST/Covost2-EN-TA-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ef861e441fd29533e55b11f801c64226c20967ab --- /dev/null +++ b/examples/ST/Covost2-EN-TA-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-TA-test/sample_0.wav b/examples/ST/Covost2-EN-TA-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a513de5aebf55fa19b2e0d001fe3ce20ca911915 Binary files /dev/null and b/examples/ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_1.wav b/examples/ST/Covost2-EN-TA-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..57b916b8b61a81249911a0350a97f73cc6ec8eec Binary files /dev/null and b/examples/ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_2.wav b/examples/ST/Covost2-EN-TA-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..044e3082961777fbb0dc86467636b15484ada4ed Binary files /dev/null and b/examples/ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/state.json b/examples/ST/Covost2-EN-TA-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..4d4c647d8ae69a5c7dba811c69508b500e4ba97f --- /dev/null +++ b/examples/ST/Covost2-EN-TA-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "963b050ae0788de8", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..843155c76ee9f22781a1d61350cee3f6ada0d167 --- /dev/null +++ b/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8044d57100d7ea1e608a3f6b1463e7315c6878b7a29f054e8b8094540c015c61 +size 713568 diff --git a/examples/ST/Covost2-EN-ZH-test/dataset_info.json b/examples/ST/Covost2-EN-ZH-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ef861e441fd29533e55b11f801c64226c20967ab --- /dev/null +++ b/examples/ST/Covost2-EN-ZH-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-ZH-test/sample_0.wav b/examples/ST/Covost2-EN-ZH-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a8b3faa66b5b5db50c9ce92155ba3f8a032531e0 Binary files /dev/null and b/examples/ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_1.wav b/examples/ST/Covost2-EN-ZH-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..38b0b2d51c6283ae60b36b713dc5196a7c72ce91 Binary files /dev/null and b/examples/ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_2.wav b/examples/ST/Covost2-EN-ZH-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..7cd489f895ab73e1307c1f5773bcd21ae9d41b9a Binary files /dev/null and b/examples/ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/state.json b/examples/ST/Covost2-EN-ZH-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..7495f64755a1258d0257220bff2dbfd7a5c45d03 --- /dev/null +++ b/examples/ST/Covost2-EN-ZH-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d72058f978d82043", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..88b694c63f7958ab56d452705fde59e48840bc60 --- /dev/null +++ b/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd733514850c9692e12d6e395907c6a5551fdebdad579153fd15ff3180c174e +size 361128 diff --git a/examples/ST/Covost2-ID-EN-test/dataset_info.json b/examples/ST/Covost2-ID-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ef861e441fd29533e55b11f801c64226c20967ab --- /dev/null +++ b/examples/ST/Covost2-ID-EN-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-ID-EN-test/sample_0.wav b/examples/ST/Covost2-ID-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..047d6f5b5944901143e9688985a729d939c86726 Binary files /dev/null and b/examples/ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_1.wav b/examples/ST/Covost2-ID-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb062e116031609643e0a8552418642bfb9ce897 Binary files /dev/null and b/examples/ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_2.wav b/examples/ST/Covost2-ID-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..7359a1c0c3a1bbf05a89d5c9185476f2358525bc Binary files /dev/null and b/examples/ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/state.json b/examples/ST/Covost2-ID-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..844be100a289ab53d963dadc96479f514727d41f --- /dev/null +++ b/examples/ST/Covost2-ID-EN-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "19ed09e0714aabbd", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4cea1d6a1d54cf9bf1f4c9a901836063251c46b9 --- /dev/null +++ b/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d927eda1e39bdb6578d51dceb68fae2ed773a56b4d212b36188d996df0522acf +size 612528 diff --git a/examples/ST/Covost2-TA-EN-test/dataset_info.json b/examples/ST/Covost2-TA-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..cc82e8b905c06d273e25883741d47917c177bd34 --- /dev/null +++ b/examples/ST/Covost2-TA-EN-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-TA-EN-test/sample_0.wav b/examples/ST/Covost2-TA-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..15edf0463a09e091fc40e61fca7310e7bb939466 Binary files /dev/null and b/examples/ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_1.wav b/examples/ST/Covost2-TA-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..72c56721fa7293f283d4b063dbf2422001398dad Binary files /dev/null and b/examples/ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_2.wav b/examples/ST/Covost2-TA-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8beb82265b0970569af53ba0ffd245a32d8c7bdd Binary files /dev/null and b/examples/ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/state.json b/examples/ST/Covost2-TA-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..34a773ffdcc01254866b2bf3a2a70e8666ff271f --- /dev/null +++ b/examples/ST/Covost2-TA-EN-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "92c0c02259ae086c", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..82dc7f2fbc62333ff1e04969d0c095e80a7152df --- /dev/null +++ b/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b1329708efd71d120ca635617f121cc584307c23d21d9fcd53efbdf980f27a +size 368512 diff --git a/examples/ST/Covost2-ZH-EN-test/dataset_info.json b/examples/ST/Covost2-ZH-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ef861e441fd29533e55b11f801c64226c20967ab --- /dev/null +++ b/examples/ST/Covost2-ZH-EN-test/dataset_info.json @@ -0,0 +1,136 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ST/Covost2-ZH-EN-test/sample_0.wav b/examples/ST/Covost2-ZH-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..6772be89c460f498224f8144709671e29394097a Binary files /dev/null and b/examples/ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_1.wav b/examples/ST/Covost2-ZH-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..618d4f4466396098c6b8682092affe1bd70393db Binary files /dev/null and b/examples/ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_2.wav b/examples/ST/Covost2-ZH-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..97ddc1a4d27a100758de33c67691b498c292dd0f Binary files /dev/null and b/examples/ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/state.json b/examples/ST/Covost2-ZH-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a6bf27b40c1391c887b487fcc106fe5eda0ed77a --- /dev/null +++ b/examples/ST/Covost2-ZH-EN-test/state.json @@ -0,0 +1,22 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "24de82e05fd4827e", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file