Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/content.py +41 -41
- app/draw_diagram.py +1 -1
app/content.py
CHANGED
@@ -1,70 +1,70 @@
|
|
1 |
-
asr_datsets = {'LibriSpeech-Test-Clean': '
|
2 |
-
'LibriSpeech-Test-Other': '
|
3 |
-
'Common-Voice-15-En-Test': '
|
4 |
-
'Peoples-Speech-Test': '
|
5 |
-
'GigaSpeech-Test': '
|
6 |
-
'Earnings21-Test': '
|
7 |
-
'Earnings22-Test': '
|
8 |
-
'Tedlium3-Test': '
|
9 |
-
'Tedlium3-Long-form-Test': '
|
10 |
-
'IMDA-Part1-ASR-Test': '
|
11 |
-
'IMDA-Part2-ASR-Test': '
|
12 |
}
|
13 |
|
14 |
-
sqa_datasets = {'CN-College-Listen-MCQ-Test': '
|
15 |
-
'DREAM-TTS-MCQ-Test': '
|
16 |
-
'SLUE-P2-SQA5-Test': '
|
17 |
-
'Public-SG-Speech-QA-Test': '
|
18 |
-
'Spoken-Squad-v1': '
|
19 |
}
|
20 |
|
21 |
-
si_datasets = {'OpenHermes-Audio-Test': '
|
22 |
-
'ALPACA-Audio-Test': '
|
23 |
}
|
24 |
|
25 |
ac_datasets = {
|
26 |
-
'WavCaps-Test': '
|
27 |
-
'AudioCaps-Test': '
|
28 |
}
|
29 |
|
30 |
asqa_datasets = {
|
31 |
-
'Clotho-AQA-Test': '
|
32 |
-
'WavCaps-QA-Test': '
|
33 |
-
'AudioCaps-QA-Test': '
|
34 |
}
|
35 |
|
36 |
er_datasets = {
|
37 |
-
'IEMOCAP-Emotion-Test': '
|
38 |
-
'MELD-Sentiment-Test': '
|
39 |
-
'MELD-Emotion-Test': '
|
40 |
}
|
41 |
|
42 |
ar_datsets = {
|
43 |
-
'VoxCeleb-Accent-Test': '
|
44 |
}
|
45 |
|
46 |
gr_datasets = {
|
47 |
-
'VoxCeleb-Gender-Test': '
|
48 |
-
'IEMOCAP-Gender-Test': '
|
49 |
}
|
50 |
|
51 |
spt_datasets = {
|
52 |
-
'Covost2-EN-ID-test': '
|
53 |
-
'Covost2-EN-ZH-test': '
|
54 |
-
'Covost2-EN-TA-test': '
|
55 |
-
'Covost2-ID-EN-test': '
|
56 |
-
'Covost2-ZH-EN-test': '
|
57 |
-
'Covost2-TA-EN-test': '
|
58 |
}
|
59 |
|
60 |
cnasr_datasets = {
|
61 |
-
'Aishell-ASR-ZH-Test': '
|
62 |
}
|
63 |
|
64 |
metrics = {
|
65 |
-
'wer': '
|
66 |
-
'llama3_70b_judge_binary': '
|
67 |
-
'llama3_70b_judge': '
|
68 |
-
'meteor': '
|
69 |
-
'bleu': '
|
70 |
}
|
|
|
1 |
+
asr_datsets = {'LibriSpeech-Test-Clean': 'A clean, high-quality testset of the LibriSpeech dataset, used for ASR testing.',
|
2 |
+
'LibriSpeech-Test-Other' : 'A more challenging, noisier testset of the LibriSpeech dataset for ASR testing.',
|
3 |
+
'Common-Voice-15-En-Test': 'Test set from the Common Voice project, which is a crowd-sourced, multilingual speech dataset.',
|
4 |
+
'Peoples-Speech-Test' : 'A large-scale, open-source speech recognition dataset, with diverse accents and domains.',
|
5 |
+
'GigaSpeech-Test' : 'A large-scale ASR dataset with diverse audio sources like podcasts, interviews, etc.',
|
6 |
+
'Earnings21-Test' : 'ASR test dataset focused on earnings calls from 2021, with professional speech and financial jargon.',
|
7 |
+
'Earnings22-Test' : 'Similar to Earnings21, but covering earnings calls from 2022.',
|
8 |
+
'Tedlium3-Test' : 'A test set derived from TED talks, covering diverse speakers and topics.',
|
9 |
+
'Tedlium3-Long-form-Test': 'A longer version of the TED-LIUM dataset, containing extended audio samples. This poses challenges to existing fusion methods in handling long audios. However, it provides benchmark for future development.',
|
10 |
+
'IMDA-Part1-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.',
|
11 |
+
'IMDA-Part2-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.'
|
12 |
}
|
13 |
|
14 |
+
sqa_datasets = {'CN-College-Listen-MCQ-Test': 'Chinese College English Listening Test, with multiple-choice questions.',
|
15 |
+
'DREAM-TTS-MCQ-Test': 'DREAM dataset for spoken question-answering, derived from textual data and synthesized speech.',
|
16 |
+
'SLUE-P2-SQA5-Test': 'Spoken Language Understanding Evaluation (SLUE) dataset, part 2, focused on QA tasks.',
|
17 |
+
'Public-SG-Speech-QA-Test': 'Public dataset for speech-based question answering, gathered from Singapore.',
|
18 |
+
'Spoken-Squad-v1': 'Spoken SQuAD dataset, based on the textual SQuAD dataset, converted into audio.'
|
19 |
}
|
20 |
|
21 |
+
si_datasets = {'OpenHermes-Audio-Test': 'Test set for spoken instructions. Synthesized from the OpenHermes dataset.',
|
22 |
+
'ALPACA-Audio-Test': 'Spoken version of the ALPACA dataset, used for evaluating instruction following in audio.'
|
23 |
}
|
24 |
|
25 |
ac_datasets = {
|
26 |
+
'WavCaps-Test': 'WavCaps is a dataset for testing audio captioning, where models generate textual descriptions of audio clips.',
|
27 |
+
'AudioCaps-Test': 'AudioCaps dataset, used for generating captions from general audio events.'
|
28 |
}
|
29 |
|
30 |
asqa_datasets = {
|
31 |
+
'Clotho-AQA-Test': 'Clotho dataset adapted for audio-based question answering, containing audio clips and questions.',
|
32 |
+
'WavCaps-QA-Test': 'Question-answering test dataset derived from WavCaps, focusing on audio content.',
|
33 |
+
'AudioCaps-QA-Test': 'AudioCaps adapted for question-answering tasks, using audio events as input for Q&A.'
|
34 |
}
|
35 |
|
36 |
er_datasets = {
|
37 |
+
'IEMOCAP-Emotion-Test': 'Emotion recognition test data from the IEMOCAP dataset, focusing on identifying emotions in speech.',
|
38 |
+
'MELD-Sentiment-Test': 'Sentiment recognition from speech using the MELD dataset, classifying positive, negative, or neutral sentiments.',
|
39 |
+
'MELD-Emotion-Test': 'Emotion classification in speech using MELD, detecting specific emotions like happiness, anger, etc.'
|
40 |
}
|
41 |
|
42 |
ar_datsets = {
|
43 |
+
'VoxCeleb-Accent-Test': 'Test dataset for accent recognition, based on VoxCeleb, a large speaker identification dataset.'
|
44 |
}
|
45 |
|
46 |
gr_datasets = {
|
47 |
+
'VoxCeleb-Gender-Test': 'Test dataset for gender classification, also derived from VoxCeleb.',
|
48 |
+
'IEMOCAP-Gender-Test': 'Gender classification based on the IEMOCAP dataset.'
|
49 |
}
|
50 |
|
51 |
spt_datasets = {
|
52 |
+
'Covost2-EN-ID-test': 'Covost 2 dataset for speech translation from English to Indonesian.',
|
53 |
+
'Covost2-EN-ZH-test': 'Covost 2 dataset for speech translation from English to Chinese.',
|
54 |
+
'Covost2-EN-TA-test': 'Covost 2 dataset for speech translation from English to Tamil.',
|
55 |
+
'Covost2-ID-EN-test': 'Covost 2 dataset for speech translation from Indonesian to English.',
|
56 |
+
'Covost2-ZH-EN-test': 'Covost 2 dataset for speech translation from Chinese to English.',
|
57 |
+
'Covost2-TA-EN-test': 'Covost 2 dataset for speech translation from Tamil to English.'
|
58 |
}
|
59 |
|
60 |
cnasr_datasets = {
|
61 |
+
'Aishell-ASR-ZH-Test': 'ASR test dataset for Mandarin Chinese, based on the Aishell dataset.'
|
62 |
}
|
63 |
|
64 |
metrics = {
|
65 |
+
'wer': 'Word Error Rate (WER), a common metric for ASR evaluation. (The lower the better)',
|
66 |
+
'llama3_70b_judge_binary': 'Binary evaluation using the LLAMA3-70B model, for tasks requiring a binary outcome. (0-100 based on score 0-1)',
|
67 |
+
'llama3_70b_judge': 'General evaluation using the LLAMA3-70B model, typically scoring based on subjective judgments. (0-100 based on score 0-5)',
|
68 |
+
'meteor': 'METEOR, a metric used for evaluating text generation, often used in translation or summarization tasks. (Sensitive to output length)',
|
69 |
+
'bleu': 'BLEU (Bilingual Evaluation Understudy), another text generation evaluation metric commonly used in machine translation. (Sensitive to output length)',
|
70 |
}
|
app/draw_diagram.py
CHANGED
@@ -10,7 +10,7 @@ import pandas as pd
|
|
10 |
# huggingface_image = Image.open('style/huggingface.jpg')
|
11 |
|
12 |
# other info
|
13 |
-
path = "./additional_info/Leaderboard-Rename.xlsx"
|
14 |
info_df = pd.read_excel(path)
|
15 |
|
16 |
# def nav_to(value):
|
|
|
10 |
# huggingface_image = Image.open('style/huggingface.jpg')
|
11 |
|
12 |
# other info
|
13 |
+
path = "./AudioBench-Leaderboard/additional_info/Leaderboard-Rename.xlsx"
|
14 |
info_df = pd.read_excel(path)
|
15 |
|
16 |
# def nav_to(value):
|