diff --git a/.gitattributes b/.gitattributes index 0cec2425e9d79f7f0bbf28b0c229a2b6b166c6ac..1d106ec848dda05696abd065e59c1d09fe637cc0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -51,3 +51,4 @@ examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-v1/sample_0.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-v1/sample_1.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-v1/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow index b3b064886d4b037b381a86c97e2f678c1a777048..992c9cbeb901fa7c9abccad60375fd87113507fd 100644 --- a/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow +++ b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff53d7bcff873fea97f02e971836f24adfbfba9ebdcfc7554423bf65cb110e1d -size 970312 +oid sha256:de5d651b3d63e461daae712fdcb437b28c163eda771ee610433cacc2c359c950 +size 974008 diff --git a/examples/AC/AudioCaps-Test/dataset_info.json b/examples/AC/AudioCaps-Test/dataset_info.json index 2b8cfe680340090d702b3d6b88a8378e4b9449fc..82148686a795bb258e6676260855fb8cf9ef19e4 100644 --- a/examples/AC/AudioCaps-Test/dataset_info.json +++ b/examples/AC/AudioCaps-Test/dataset_info.json @@ -118,6 +118,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav index 4331131a6543acb7d4f6e69a71708518f65bc034..e37f34b9f07a7b6266957992824bab914ce15fe3 100644 Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav index 2fdb280d6d9a2912c0aad84bc423a3b17ab4acc7..9f16faee9805c2cbf530d36958c99f82952f42f3 100644 Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav index bd06790c5bba36aa0c69693e84c4bb40cdfbc290..95c13a1a0c7e8fce564fba1e7ecd506797ef83ed 100644 Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json index 65cc6f6b6acacba97b729c23d8639556e02847f6..b334a469898f2333372cff0d2c4e4def008d271e 100644 --- a/examples/AC/AudioCaps-Test/state.json +++ b/examples/AC/AudioCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0e301916c3676d35", + "_fingerprint": "e654a4081bc1365b", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/AC/WavCaps-Test/data-00000-of-00001.arrow b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow index 8c9b17e83153dcec5bdd46f699aa5f2cee3eeeb8..7dd95d9cb74224287374acb9d891de65763c2a7e 100644 --- a/examples/AC/WavCaps-Test/data-00000-of-00001.arrow +++ b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c824145183da91da498c0338860b6daeaed94d7c619ec9ae22a041918c61c902 -size 985360 +oid sha256:ca86f9ee00453c956db08be9d77e62a74a9accb5128c3dec0cb91a564a653ed3 +size 529952 diff --git a/examples/AC/WavCaps-Test/dataset_info.json b/examples/AC/WavCaps-Test/dataset_info.json index 0854513dfb2583899f6f4539b14e6b2982daf86b..acf11db0c4cc1869f7763270ddadbfe4c30f73d4 100644 --- a/examples/AC/WavCaps-Test/dataset_info.json +++ b/examples/AC/WavCaps-Test/dataset_info.json @@ -114,6 +114,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav index a76589f0898c5033ab99b0a15f34564fcd4e0d89..86803de3706292dd68f1a49c6dc5b66662eabae8 100644 Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav index 011a74daa10980002a95da355e3534e42f69bf6c..00182dc3b08c94349036bf2f6da3df783e7358dc 100644 Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav index d8afaa47b29c26dd49fbc93a95e3d8310a402a11..a9c4f74921215f29b6e4e01b3ec87b98d4c64a04 100644 Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json index 1157a81d2f767520f45c0be2fe9bab4ff33f96b2..18c89274b45b312d6a38f3f6e978483c100b9db1 100644 --- a/examples/AC/WavCaps-Test/state.json +++ b/examples/AC/WavCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1e570096603c2a32", + "_fingerprint": "ce408e4cfa3eec8a", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow index 9ebb255230c929cad6fcfff2388a8c5fd5cbef50..48f20e6301f3173259f56f7ae9e905e28cfeb57b 100644 --- a/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44587f333a31075993f3cc8832f8946fd59605246769dd8bf86887dcf3bd889c -size 543912 +oid sha256:9b59c848db4f23898202a290a248174e7f05709c7cb32dbcf003afd40bf805ba +size 531272 diff --git a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json index 0ee97efe251f1c21a87ef3c09372d7249c0d657c..61c4c1050499fd3220dbfe6013858b008c8d9810 100644 --- a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json +++ b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json @@ -34,11 +34,11 @@ }, "other_attributes": { "accents": { - "dtype": "null", + "dtype": "string", "_type": "Value" }, "age": { - "dtype": "null", + "dtype": "string", "_type": "Value" }, "client_id": { @@ -50,7 +50,7 @@ "_type": "Value" }, "gender": { - "dtype": "null", + "dtype": "string", "_type": "Value" }, "language": { @@ -146,6 +146,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav index 0037bc557881e74bdcc687e1d28af287a6c22344..98ad00ab81a597db93fb42634a7b173662c5402e 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav index bc3c6afd5fe31546b091231f643b37f05ba11595..2ff5b62f10f2614553c20a23a94c67dd8813f7c7 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav index 4245350f9951353f48a156651b2b0bc3629ae84a..98eff0b7069351279a995d5e33aa12b83a109dd0 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json index 0dc2de30ddac4cca90d5224840099629052ffe14..3064aa54130945bc783ab898011d0e84385e02fa 100644 --- a/examples/ASR/Common-Voice-15-En-Test/state.json +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "30218d56801da2e8", + "_fingerprint": "6342d438049fbc7e", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow index c99c2a3f84c01c3b2ad7555d79c55dbdb2ed3da2..0be8d8dfc6b42f8ce64868e73d85a76b369dde37 100644 --- a/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33dce5920f131621276ca79ef824fc52d5d5bb043d7672458fdbe037c7fb60fd -size 310572568 +oid sha256:026f3aba629a6ab6600e0cb70e057725aa5719dad0ed5710cac783974d3c7cb5 +size 316326352 diff --git a/examples/ASR/Earnings21-Test/dataset_info.json b/examples/ASR/Earnings21-Test/dataset_info.json index 08b3d046e3630b0d47e267cf866bc1ce79106af7..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 100644 --- a/examples/ASR/Earnings21-Test/dataset_info.json +++ b/examples/ASR/Earnings21-Test/dataset_info.json @@ -110,6 +110,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Earnings21-Test/sample_0.wav b/examples/ASR/Earnings21-Test/sample_0.wav index 00b589a907fb97c85923495ea5e471267435498a..effe4f4306d0ae30e9088dc341a753da0dac1581 100644 --- a/examples/ASR/Earnings21-Test/sample_0.wav +++ b/examples/ASR/Earnings21-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8e88eccc61c5e8a5e62867c52a8ccfc4e108a5f48459f7df0eb1e9ae7f16d4f -size 139072236 +oid sha256:ad501843c9718b06a1bac99177c5e85cb987c9b29a46f0da0970bced3e7df7a2 +size 88692524 diff --git a/examples/ASR/Earnings21-Test/sample_1.wav b/examples/ASR/Earnings21-Test/sample_1.wav index 41480ae277b97f832773cd9d05fb187bb9e6c0e4..e20907a027c9da25fc6544ec87f07185b23c3567 100644 --- a/examples/ASR/Earnings21-Test/sample_1.wav +++ b/examples/ASR/Earnings21-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcd3ce7847b7fb0f600bb3faeca3cd2a8f4992fd1de17bd831ef6ccfb2623f33 -size 55065644 +oid sha256:bd6ba77731011a6dc02e5854a600a2036713be4c2d71abf63fd6a89b86083c4f +size 178791280 diff --git a/examples/ASR/Earnings21-Test/sample_2.wav b/examples/ASR/Earnings21-Test/sample_2.wav index 17ba2bbd1608c96c62a420bd267cf9270da8f035..858a5bbeb7fe40409af20bc5df13bf17f862748c 100644 --- a/examples/ASR/Earnings21-Test/sample_2.wav +++ b/examples/ASR/Earnings21-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:607725d2a33f206a0f00668f1907f4d997fc7dbe7d1e1f9c82045496412bd8bd -size 115039268 +oid sha256:157daf8de352731745a22fbb4680f6d1b493ddfa31b1f80db4039f474a7d8ac0 +size 47255852 diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json index 206bda8429e14fa71c1fbe9ddf26f730e0359b97..37a0a357127feb8eb1497306a502edb53a1e4eee 100644 --- a/examples/ASR/Earnings21-Test/state.json +++ b/examples/ASR/Earnings21-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "2e3dea299b387757", + "_fingerprint": "d0ad1703cbc51418", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow index 01756fb35be56da03615aa94008d40f4567e2403..7277904497f248a6e53391f4c4f6bcb11240187d 100644 --- a/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51a45b6f2b6f1eaefc76f48f4778b5d5d46793e0a0fc846b1c2dd4b639dd173f -size 299513312 +oid sha256:80d673ef090f9c98675b12556bac6f7f17d1594e8c8c4a0ec81d069fb9921473 +size 369181728 diff --git a/examples/ASR/Earnings22-Test/dataset_info.json b/examples/ASR/Earnings22-Test/dataset_info.json index 08b3d046e3630b0d47e267cf866bc1ce79106af7..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 100644 --- a/examples/ASR/Earnings22-Test/dataset_info.json +++ b/examples/ASR/Earnings22-Test/dataset_info.json @@ -110,6 +110,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Earnings22-Test/sample_0.wav b/examples/ASR/Earnings22-Test/sample_0.wav index d637495fa7239cdf1a837a632f3d1b3b02b7b16a..fef91d911b56614c75cb0e70f6d7010f31af1605 100644 --- a/examples/ASR/Earnings22-Test/sample_0.wav +++ b/examples/ASR/Earnings22-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df99676242890437003f9de1ebe2df3ea1e55365a76eab1b08bbe5ef6c235bf0 -size 39828524 +oid sha256:a9e7958f1ba96ac88655eab070583dadd0e380891c056b373a6dc6e7e631de75 +size 129058604 diff --git a/examples/ASR/Earnings22-Test/sample_1.wav b/examples/ASR/Earnings22-Test/sample_1.wav index 0e5a289da58dd8768e1e3e4ef3858f2c6b5d7e7e..2824afccc6b00699dc847ea4684d455f48d2477c 100644 --- a/examples/ASR/Earnings22-Test/sample_1.wav +++ b/examples/ASR/Earnings22-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f46cd00f65ad5466bbe58fcbaa45dbbdc62921f876890b78da515b3574178691 -size 140347052 +oid sha256:7053d9e3d049cb299afcd1951678daf6706ada86c5edecdd208f0a5796b0c515 +size 121073708 diff --git a/examples/ASR/Earnings22-Test/sample_2.wav b/examples/ASR/Earnings22-Test/sample_2.wav index 45f6f543dc102d5d73cb768a62c2e9e1846ba74c..f243f84f229ec1ff439f641090532b3e34fdcd52 100644 --- a/examples/ASR/Earnings22-Test/sample_2.wav +++ b/examples/ASR/Earnings22-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7144b858f3de344f601031d093c3403199dc04cad49287cf71dd6ea04ca4dd6 -size 118169900 +oid sha256:fd5ca45b90a4997950deda21f13c95dbaf67cdcb23164df6fd7086c319e0d61e +size 117282092 diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json index 643700cb9278c071e7a0ca46b2760cf18034ad11..ad7966db6aa47d1e1dbff1f49d710fa0e2050484 100644 --- a/examples/ASR/Earnings22-Test/state.json +++ b/examples/ASR/Earnings22-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "c2ddf91e8ccb230c", + "_fingerprint": "fb047ff90ed3a443", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow index 80e7846cdbade2e98a13a8439818fb634d13fbc3..661df197e82869bb10efdd392f47ba7b53c59e82 100644 --- a/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow +++ b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:573e5fd3cdcb0c5b27f9958ba68113a455fa426dec336f7635677c3130a265fe -size 693672 +oid sha256:dd97464fe4407bab4efb6550648ebd6b9d26e66198d5667411e67d7144eb0664 +size 545808 diff --git a/examples/ASR/GigaSpeech-Test/dataset_info.json b/examples/ASR/GigaSpeech-Test/dataset_info.json index 7056dd861bf6cf034028f9f01c5153ac3e4ec304..1dd0025578e934e74b979da9e81789eedd9a2f29 100644 --- a/examples/ASR/GigaSpeech-Test/dataset_info.json +++ b/examples/ASR/GigaSpeech-Test/dataset_info.json @@ -138,6 +138,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav index f2afdc748f9c58eb2e6cd9793b8a642471d23c31..073483a0e4604323d0369ff2216b2a5765780823 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav index 942b01315d851d0ed1a72ae561e6859c9fbb32fd..ca1b14f3ed3ea483c331f9acbaabde5b03c0b35b 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav index 0e61bad88fd0422f1f69e07c1d79da115df77c7e..11094ecca41233498c7b38e51b9b70971f113ff6 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json index e61469ffa3f7414712f1f7fa7fa6525b49e5cac4..dd846b8228bc8c6a5d6d88f7300ff35a6b51e5cb 100644 --- a/examples/ASR/GigaSpeech-Test/state.json +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0032f92a85e94025", + "_fingerprint": "84a02614da440215", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow index c7bd43926e41e9768d8da89f5bb2a46ff1a302d2..b640107ed85d3d2c5aafa6d26047189b7a4cb37a 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow +++ b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:136cd0551a26eb0407715350491a49cd3c80cf8440c0c5d6b5079bdf1d81d260 -size 382752 +oid sha256:5f9d1180986a9e64590852464dcfb972170518d45f4fc0189b7fc3621b8e509e +size 485936 diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav index 9f1b0abb84dc9bb30a04f2b08eb0f0be0ec33cf4..ea36e7a02a6771ea96e210107b81ef3a5e5cd791 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav index 2f5b49a926a9509ad34700c9d00df8bbda49fd57..553aa3569726cd67a40baf3f50c7b5e18f32cd74 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav index f36eb9efe06c44ea5cbd63d26257e8d37f0079b7..e4a9780515675dbb05b95cff26fb5aea26fe9aa7 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json index 6e63ed10109432151f62c147e30d2704343a756d..d8919f5bfc220aed30d07a397179f30478ac3176 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9b9c1437475afa9d", + "_fingerprint": "1514e693988caee7", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow index 7ea3bfcd404232b6ae9582683615a77c2bb3ac25..b2643e070a834f4ded9ce58b67a751d84dd0426b 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow +++ b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad482831507358784822a617955723251f237ff24e9dfba98265ebd1bebfacf5 -size 347952 +oid sha256:d8adfae25f905c40a6b1ccc95fa8f55826a0e6e8840f7b04e655aab873635941 +size 328864 diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav index fe0422bcb8356543fbde4cc9981779ca712dcc22..1784a0b78a165892ba01586b04974a15dda4eea3 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav index 4f6ec7d97638278d3cf05791b44d2d072ab161ba..1fde75aea28f1557cd2fe3d80434d8b394f116ac 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav index d1e955f3a2f6d402e2661703d4880a4881c088c8..260cd5f87122a500ebf3b014bb2c323cc1200abe 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json index 2dbed7b83dd6665624b87d3b11c514bc97e22f29..21299bb3dfc3a9f29854e5a39c5e2130dfca6bae 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1b048a2e54d0c002", + "_fingerprint": "8a8e117080f24a8b", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow index d700f9546d3292dede0d5f946725fee43d739733..bebf6c7d1f97014bba53dd96dde465b78d340792 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow +++ b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28a1a86f5cd34ee42918c78eef50f95a53953fd352fb0ecc758543025634d2b7 -size 555160 +oid sha256:af079564f35d59eb2cbfd4a1dba6dec7692bd78025e45de8d3915575a60218ca +size 747384 diff --git a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json index b7936842ee323be9e4c2360418e44a417cf0f626..5a91f71cfb2044e6060c8f395ee4b798384d32d8 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -122,6 +122,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav index 266ab3ddafe3c5d192606c6cdf579238b0150e62..2a103bec37460090a764f280ebcf4791ccd17d4e 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav index 238f6c95cf91c6bff49636351557961b3e06d356..60af90099e82c9115285de4897e38412a9440cac 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav index f127981ac0a35d93e4fcf3607a84858e9ea276d9..d44035f1bacdce219e44056683944fd9cfecf7eb 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json index 1c1c2c843a1f7da02ed60cbeb427026c76972e7d..103a2443a4c9e5355798ee842efb63b6bf286af8 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/state.json +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "c56d3af03a1dc565", + "_fingerprint": "cb0a09e53f0cc5db", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow index 8e4284b54778666bc3ce9cf7c54e31d950dd8383..917ec8972648ec44e133d05c6aad4b4c925defda 100644 --- a/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow +++ b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f87fbeb401741f3fe2c384cd859b4065c339239cce8d5718ab8232d873f00a75 -size 873440 +oid sha256:4840496b28fd96061a72c3050f3de27f5ddce15a1d3753bb38f4f9850e736e1b +size 535368 diff --git a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json index b7936842ee323be9e4c2360418e44a417cf0f626..5a91f71cfb2044e6060c8f395ee4b798384d32d8 100644 --- a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json @@ -122,6 +122,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav index 9ceae84addeaa1cfd86a5ae164ba3f48c7994c2f..595f7ba137749fd1578688365357e230351f6660 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav index 5facbda4871665edf9aefcd3e58b6056b296c233..948927ee05b269f31a635b2682751b4f0d50a8c5 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav index 237b56f851d3989319b0fe06ce85878ab69c4850..82b4b2afd731d0c2a1501e34c5689cff431955a0 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json index cd23da8c954dfb155a1fd333adb7cfbdfcde8eaf..f296f217907538df40fe8adfabb15d59e5f8d326 100644 --- a/examples/ASR/LibriSpeech-Test-Other/state.json +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "5fc28a37097fe19f", + "_fingerprint": "0ed80d8ca27350ce", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow index 7a6adf5c59f4187480c64b55f30dcf754db07d1c..9a86fdc702b8baec1bf87cb75988ccb16d96d448 100644 --- a/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9ee24614187cbaab9f8ded6901791a9aaf5bfdda56a358dd8226be5c5e0462a -size 810976 +oid sha256:90fbb333a4baca6b1cf2c7c73dcf4050e1bc12303d8bd28e0993c12869fee3ce +size 655848 diff --git a/examples/ASR/Peoples-Speech-Test/dataset_info.json b/examples/ASR/Peoples-Speech-Test/dataset_info.json index 0a6ab198f1ae81f599b4abcef59dba0043ab934e..a8d496de62c861269b2a5c8bf9826fedc8abf807 100644 --- a/examples/ASR/Peoples-Speech-Test/dataset_info.json +++ b/examples/ASR/Peoples-Speech-Test/dataset_info.json @@ -114,6 +114,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav index 560d3df564e6658b9d7040aff205a47737e16dd2..d18ce818679d916f7d285ab41c4eb0b4c64ab80a 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav index 1513c2587700588fda3c13c14f037bb821dfd6df..7b6d8264d363621884f39814ea63380cb64434a4 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav index 8ab7248ab6278f1ad098d67fee87fec1a934b0bc..ec498dbdb1c5d0bac49a03779da1f995bd621c07 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json index 37d3ef70f3164ef13c8bd4d576e7d8b4eb9a4511..41de0c42892f63c7ec1efb30e3d62a0be9f877a8 100644 --- a/examples/ASR/Peoples-Speech-Test/state.json +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6203edc47e9a3c56", + "_fingerprint": "bf71989dac1baa0c", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow index 9c0fb1b137104b7fedf1980471505c2ae8b630ff..8a623f9166d791bd3048ebb40e6cf8ca58263d6f 100644 --- a/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Tedlium3-Longform-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9264049c1cb1e16e464a0ee978e5386733025f4fd2f135eea86def1600d3826c -size 101452440 +oid sha256:1d79b07a1c0a2eb8a69abbad7bbc87b0c8c5367f05bd7589126e6276455623b4 +size 109998912 diff --git a/examples/ASR/Tedlium3-Longform-Test/dataset_info.json b/examples/ASR/Tedlium3-Longform-Test/dataset_info.json index ce0eff747512d4783e49763b36beac50f103dce5..cf9965f85a972806830fe14f38cc250366f94118 100644 --- a/examples/ASR/Tedlium3-Longform-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Longform-Test/dataset_info.json @@ -122,6 +122,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_0.wav b/examples/ASR/Tedlium3-Longform-Test/sample_0.wav index 8132b0bb9c72b9c26882c19cdb542c31580e3271..458550d1f8ba2439a49bbdd3acce6d28a8050798 100644 --- a/examples/ASR/Tedlium3-Longform-Test/sample_0.wav +++ b/examples/ASR/Tedlium3-Longform-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e4219fea24ab3ffdc256b7f6c5f0d0ce8dcd9eb0f80cf15a9d1b3afa6b6e279 -size 26867124 +oid sha256:df373978edb883e7c38cdb99f293ab770f71a63bc44214185751e229d53edc4c +size 31435404 diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_1.wav b/examples/ASR/Tedlium3-Longform-Test/sample_1.wav index 17eccb9b542ec47bf29066074f7435f6b06f42cd..8132b0bb9c72b9c26882c19cdb542c31580e3271 100644 --- a/examples/ASR/Tedlium3-Longform-Test/sample_1.wav +++ b/examples/ASR/Tedlium3-Longform-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0361b7dc4a9437a698a1d28fb3e73718810a6d685f7932f2a04997ad0475b182 -size 36730348 +oid sha256:2e4219fea24ab3ffdc256b7f6c5f0d0ce8dcd9eb0f80cf15a9d1b3afa6b6e279 +size 26867124 diff --git a/examples/ASR/Tedlium3-Longform-Test/sample_2.wav b/examples/ASR/Tedlium3-Longform-Test/sample_2.wav index 2eff19f1f7ce5c124d20e874f244975410f7113c..41dbac609aefb991db732192675b67164a3106d5 100644 --- a/examples/ASR/Tedlium3-Longform-Test/sample_2.wav +++ b/examples/ASR/Tedlium3-Longform-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32c1ab4c6363f3db0f5fca8bdc5f2eb780522a08da3cd00bf6a4b1ea7e3cc0f8 -size 37362604 +oid sha256:ac85b08c75fad06d968942b517e53495961ff861c6e794b576ecce3b406bcbf8 +size 51095404 diff --git a/examples/ASR/Tedlium3-Longform-Test/state.json b/examples/ASR/Tedlium3-Longform-Test/state.json index a1041e8ced6bf9491bb3d91d8d0f8d5814c98494..acb7115eb5d31d5d7a8e9c066448ea7e12bed918 100644 --- a/examples/ASR/Tedlium3-Longform-Test/state.json +++ b/examples/ASR/Tedlium3-Longform-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f89ae31db7413bf4", + "_fingerprint": "16ff45685ee4694b", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow index 90b3b6d6bc83f53d0b78e62028dbffda36c2cec3..3ec32001db45f8e69b9ffddc9ce6339dd5f5590b 100644 --- a/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6501f1e1b94e6fa24eddb4b65dd97ae10f4c3df45db946f3bc3b927fafbca908 -size 633448 +oid sha256:e2b4593b23a313e4f640e22c9e0daf6de43e3fa4ef510576c1d596a279b81996 +size 1025976 diff --git a/examples/ASR/Tedlium3-Test/dataset_info.json b/examples/ASR/Tedlium3-Test/dataset_info.json index ce0eff747512d4783e49763b36beac50f103dce5..cf9965f85a972806830fe14f38cc250366f94118 100644 --- a/examples/ASR/Tedlium3-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Test/dataset_info.json @@ -122,6 +122,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav index c7bd8c3b68ed10585921e11bee7d24a34339f78c..94362f231d21545289d87680f8580d7dd2de71e8 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_0.wav and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav index 3b684a181c9fd4ac1d95c2fc7988379a26f677cb..8a73e7a2a9ec7b50f172450c75d9d16a2b7c0987 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_1.wav and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav index b1ab681404201295b8fb1355c1c29d92147a750e..79460a0b295f5b0ee22515fcbb7fcdee5ce83816 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_2.wav and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json index c0eec8964db7c884c56d55d6cae98406a6c52de4..5db8543827fbeb809394b23b9163137b6d9f7b71 100644 --- a/examples/ASR/Tedlium3-Test/state.json +++ b/examples/ASR/Tedlium3-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fde82500f706458b", + "_fingerprint": "33116bb8d5c8b7bf", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow index c278ed60e5e4eacd0623c14b19f17e3fb6dc9881..2a55ca00d856a62b9141a17d56c55bc2b0d36d2f 100644 --- a/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow +++ b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29cf1b882a819ad04cc16d81b42dd8de6ec402b00537887845c33ce283385eb3 -size 458128 +oid sha256:4d6d91fed0e61ca2a837992ed3d410f35eb93bb89c8aafca34bd37f88f0e04a7 +size 378864 diff --git a/examples/SI/ALPACA-Audio-Test/dataset_info.json b/examples/SI/ALPACA-Audio-Test/dataset_info.json index 7dd927db4ab336b38a99b686aced46f2e4d9a5cb..882789dac9ac95ae77467581f55b4f296746e7a9 100644 --- a/examples/SI/ALPACA-Audio-Test/dataset_info.json +++ b/examples/SI/ALPACA-Audio-Test/dataset_info.json @@ -126,6 +126,28 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav index 3d516af829afc1afcea07064b2aaaec734175e61..9d59769e693ba8186a468c920bf33aed6415b9a0 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_0.wav and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav index 3707bfb5a929acff874947ad6ea76b902ddbc2bf..9fa77dc3a558295c617e6ba5f9c77d4a744f18fe 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_1.wav and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav index 70248c9fd2ff3b5792099536311d05bed53fba47..e7446eb105a62c5223dc799407a78a303ffa3273 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_2.wav and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json index cba82888d784fef2f53a04b162e712eb4813e7dd..3b1309df321667faea7ee972e3d92df95efd575f 100644 --- a/examples/SI/ALPACA-Audio-Test/state.json +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b48e6d66699d09cc", + "_fingerprint": "c6a29ee9e25361a7", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow index 35154d6b77f4255cc194837e18b4f701795e3c77..1c3f566dde72b635c03eead9812cf6dc53b34834 100644 --- a/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow +++ b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7748a3da55014d5c3b955a7eeb7c03a43a6cc8735faba6e4c9c756b2940cdc5a -size 452872 +oid sha256:5c05a411b9924e8179a9b3fb044d5fc2ac956d7c7bb41079d39f01cecea968bb +size 448912 diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav index 3a11a8017ed68f552b088939360efbd713fb23d3..58c30ae68669cf5651f7dcd57e28e73b9347f677 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_0.wav and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav index c5bb85768cc4a7c6f7206fc9c56412a614d08d9a..58ffc5f5c796a545f6ec10417f286ad13acc64cd 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_1.wav and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav index 63ae3d47c286000d5b4180e65a0aa12437dbe7d6..7d013e06ccb1a7b66fa1371cd7dd25a0067c6424 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_2.wav and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json index 7dd94d080c16d3c9654a1ba57c8142e0b9c1380d..83993faa18ef244ce7cd82d1a262742737e1b751 100644 --- a/examples/SI/OpenHermes-Audio-Test/state.json +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "2edb418fca8ac348", + "_fingerprint": "44d9e5a3612e0df7", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow index 0b86442465f821173a22ba9723daf4dc44f8ba2b..3cd61b5dd0736224a5e706da855a34aa3b556185 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow +++ b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28bc46da28428d3b73040f668cbc85db1536ca8e44c8e158c671963aad4cb0e4 -size 1918152 +oid sha256:04553c1f7978fcba078cdd16f3e8d5bedda25c17b9df6e092b02f3cd2ad656e1 +size 1576640 diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json index c3526b447e28b1c62fece39889011d58962d741f..81903da2dc3bf5b849827427d5a61f932fe485bb 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -45,78 +45,6 @@ "dtype": "string", "_type": "Value" } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav index 6923b4c01dd221f14c21f85df1e8165eb5b59bbf..b383286458adcd302192c1a8840bf8adf43b1c41 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav index 750f1b8414060167651ac33128408b510b1545ca..fcdf3095b8c9b36fd8d1140fabc9c22e506ce290 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav index 0a7aa93de8bade69d708bbf36a8954843b02c2cb..3674d26ff67ffee9fbf6cd64bd00af74906a37d0 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json index 75ad396b15b37427ae3dc61c702735f6de828994..c6cc629f7c25521767ac482dbd1e71307ff9c443 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/state.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -4,16 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "a9d1aad6ff31612e", + "_fingerprint": "efbbc5b180ee96e0", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "whisper_large_v3_with_llama_3_8b_instruct", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow index 0b25a112d290d54981e0b560352694831bad5fe9..12eb2e58f804d16c2db9a6693e90245cc68639b6 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow +++ b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fecdfef3a25aa2495ab3197b2fe30fa77cfb4c6714b35815f433e52d176d8d9 -size 2217584 +oid sha256:4ae7de4369c90c75f710be601d59b74db79e17bbfea1d8967be99bfae9ca7e16 +size 3653240 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json index c4eadac83d7d01b0964e29d70f224e2d0a3a9246..a16938258fabe5c46b36019db75cd53e5132c21f 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -59,78 +59,6 @@ "dtype": "string", "_type": "Value" } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav index 3af6793b18199045cab70e74c5ec3ce96d7f16dc..66cfc3e59a5986898075fed944561264db28f426 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav +++ b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:899682e40d59cbf79cdd27fe1ff7a108b6349159defaa8bbea9c93cf6f2fedc0 -size 1278380 +oid sha256:12ba2e360c480652e934910021d2692726e23b74e9358d81adc15c312fea4e94 +size 2164844 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav index 36cb70eb458d7650de9d8fce1d8b46dac6ca53f2..8f6275ca50ba72f0800b5392777d63d5d1655a2f 100644 Binary files a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav index 845c1cc2376e7b81c3698c53151bd09c119b79a5..e0cb946282c8286953d6fee2edfa6fff6ed9681d 100644 Binary files a/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json index 70faed9032446a81505aa07f10cc97f424fd3026..9839f8b445e8be16c0eacbf29b36a184b7e09cf0 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/state.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -4,16 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e5f8ea158b4a9ca3", + "_fingerprint": "1414881d04766772", "_format_columns": [ "answer", "context", "instruction", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "whisper_large_v3_with_llama_3_8b_instruct", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow index 59e07ca9393b991fff31f4d506cc0aecb9ad826e..146cb60d081878a2ee58eebb443ee065af3919dc 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow +++ b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d5be42e78f6f7b3e6a0e3965bbad83b99ad9dcfbbb60edeba20c2e126afa9c8 -size 3873408 +oid sha256:d61889178f0e10699a3d75a8e1dfc815281a707753564bfd1810269d5c4b6c30 +size 2434880 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav index bb649d8f669f8cff91e61cfadde01620e5fc6fa9..5067df6a584ba334d2a6d74e2ed3f10e9964f98a 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bf01f3c82a571576a2c1e4968142684d1b43a81187fb190d1105a907bb2e90a -size 1400162 +oid sha256:12b924f1d6117f412cfbc47243ceebb128b235432596171238c39f27575367d6 +size 716894 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav index 5c04ae7e9bc5ab1b45a159b3b5928e31db2f2198..f0b054dabe0e86ff8c6419e192e5c1c0ad3bbcc7 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89b32f4f4294b755fd1d61e0f450f7e11a566fde6d8011d524476569c8ca65d8 -size 1524614 +oid sha256:f7862a175d879b81ed5d821a1fb318067f7fbd8bf6a17946433d9399d4f5fe8e +size 1051126 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav index 19bef4b0a13042e08e5c59f0bdde09868c33bff1..9911e47fed2232a4c5cd141221b8e8bc98ed66cf 100644 Binary files a/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav and b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav differ diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json index 579a4a84b69f43df3d260b1e814859aa751a0126..cb60981a9b1ca32983da76e04178d039a7507568 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/state.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "77292393d596f5fa", + "_fingerprint": "794fdda22fd93c7f", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow index 1b56d95162403fbb91b0c2fac05154f6167025a3..d1083315e8bf3c047ec2a17c7c4a49d6dd989e30 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow +++ b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfc3fbcd2d7a672e863657eadb4c19c1f83bf2bd6964e41622311b1da913a3d7 -size 5589160 +oid sha256:692c3bd6c8e2510b1e855bf8b8045087ca652a2b15586b2258ce645e25e364da +size 5620424 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json index d0f327fa13416dff7d7cc45e7f9652d37ea2b296..6c681f17c89b019564bbd2d0509865a05ce4db3a 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -155,6 +155,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav index 1e8d727fcd2c976bd37a9cab862053a06814e000..3187166b9ccac7a8793e5f27d3de4feaa3fe24bd 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f611c50ffa4ec236f7652646ef655e6511768b2ed0d1cf0a0e87a89ea05ca46e +oid sha256:76ad2ff1b82c3abf752a38d5ef6db2ffe35d3b7ce103501d5e17dc190082da14 size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav index faa36bf23e79182c248128fb398f6bb85e5ba4ea..76f2283a80c54683a9dda0a770ca16600443b35d 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f753fe113aabdcee26defc81a50f4dc985086c81c44abb2dbbca15c969faa999 +oid sha256:83dd1c9f1187291ab43fc895a0a1ff45f7c9f975e5262d55306c45a91bf393cd size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav index 3187166b9ccac7a8793e5f27d3de4feaa3fe24bd..b01e112fee9b85aabf49a742938a0477b7328a95 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76ad2ff1b82c3abf752a38d5ef6db2ffe35d3b7ce103501d5e17dc190082da14 +oid sha256:e6d709a54d78815cd121c345f7f0959d8ad970e7aae366c248fa1e71a343b95c size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json index 5c88181b585af5561ec3cfbd92c9d80a9e2f1f57..2c3aad4c97f82ceb8147cd280eaa55e3d858b29d 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/state.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "35d15f00aac76485", + "_fingerprint": "8a250cf20de7599a", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow b/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow index 24538529c4259feb292c0ccd989e952ae1cf58f1..2cac1b1dd35d9b07cdf02a2656d20e936864954e 100644 --- a/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow +++ b/examples/SQA/Spoken-Squad-v1/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75afcf7693963ab137b8504ca60ce1894fceb08fd8d98b921f43c00fe5995820 -size 5069224 +oid sha256:84c5b664e578e4227087b1971ccb05c6529216004997ae228ba128dc405dbd64 +size 6685072 diff --git a/examples/SQA/Spoken-Squad-v1/sample_0.wav b/examples/SQA/Spoken-Squad-v1/sample_0.wav index 182c444046b5c55624be45dec892f8e09cb459e2..8a2de266f471f3befc4a1a527fdade06a37f6831 100644 --- a/examples/SQA/Spoken-Squad-v1/sample_0.wav +++ b/examples/SQA/Spoken-Squad-v1/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4f40d0fb96afe94b5cbcf3dfbe55d5e92933a7cab1504d1db3f3f5d9cdd7ee1 -size 2091308 +oid sha256:4605c9f5d91377974dc6899e8aa719d14ccdd17f02c11f49577086fd2c8331eb +size 1754924 diff --git a/examples/SQA/Spoken-Squad-v1/sample_1.wav b/examples/SQA/Spoken-Squad-v1/sample_1.wav index 3c93fc036f8b54fd8ffb43c55bb86016ede6a1ea..88cdd548f41f9d25fa36229ee6d4330e2e1f751a 100644 --- a/examples/SQA/Spoken-Squad-v1/sample_1.wav +++ b/examples/SQA/Spoken-Squad-v1/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2607ab0164db9bd50840415561db20d11fede490862b67fb374a15f808c4ad0 -size 1472300 +oid sha256:a5e3572a6087d5b07f11a2e4fbb655d8b790305da234a5e498f7ba6f528fbb36 +size 2813228 diff --git a/examples/SQA/Spoken-Squad-v1/sample_2.wav b/examples/SQA/Spoken-Squad-v1/sample_2.wav index 96edc703cf7c334dec939e67168c4c2998bb21f6..a3888c02ae4e87f184fcebd028eb9a0d2da684b2 100644 --- a/examples/SQA/Spoken-Squad-v1/sample_2.wav +++ b/examples/SQA/Spoken-Squad-v1/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1aab7b6ca2c2206cc894962b3b07428670b9c29398ae9fd43822896cf9d84c97 -size 1498412 +oid sha256:05875dabf62b82922df3c7d374b5d7ef63daf6c6433b6e01048175851f3f3a09 +size 2108204 diff --git a/examples/SQA/Spoken-Squad-v1/state.json b/examples/SQA/Spoken-Squad-v1/state.json index 89840561a23f1218e34bb60af320658effae2d2c..64ffce20f87758120e69473703b377a002af8178 100644 --- a/examples/SQA/Spoken-Squad-v1/state.json +++ b/examples/SQA/Spoken-Squad-v1/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1d8937f6900ae52c", + "_fingerprint": "d63972b6ec648239", "_format_columns": [ "context", "instruction",