zhuohan-7 commited on
Commit
1f3a4ec
1 Parent(s): 29b917c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app/content.py +41 -41
  2. app/draw_diagram.py +1 -1
app/content.py CHANGED
@@ -1,70 +1,70 @@
1
- asr_datsets = {'LibriSpeech-Test-Clean': 'aa',
2
- 'LibriSpeech-Test-Other': 'bb',
3
- 'Common-Voice-15-En-Test': 'cc',
4
- 'Peoples-Speech-Test': 'dd',
5
- 'GigaSpeech-Test': 'ee',
6
- 'Earnings21-Test': 'ff',
7
- 'Earnings22-Test': 'gg',
8
- 'Tedlium3-Test': 'hh',
9
- 'Tedlium3-Long-form-Test': 'ii',
10
- 'IMDA-Part1-ASR-Test': 'jj',
11
- 'IMDA-Part2-ASR-Test': 'kk'
12
  }
13
 
14
- sqa_datasets = {'CN-College-Listen-MCQ-Test': 'aa',
15
- 'DREAM-TTS-MCQ-Test': 'bb',
16
- 'SLUE-P2-SQA5-Test': 'cc',
17
- 'Public-SG-Speech-QA-Test': 'dd',
18
- 'Spoken-Squad-v1': 'ee'
19
  }
20
 
21
- si_datasets = {'OpenHermes-Audio-Test': 'aa',
22
- 'ALPACA-Audio-Test': 'bb'
23
  }
24
 
25
  ac_datasets = {
26
- 'WavCaps-Test': 'aa',
27
- 'AudioCaps-Test': 'bb'
28
  }
29
 
30
  asqa_datasets = {
31
- 'Clotho-AQA-Test': 'aa',
32
- 'WavCaps-QA-Test': 'bb',
33
- 'AudioCaps-QA-Test': 'cc'
34
  }
35
 
36
  er_datasets = {
37
- 'IEMOCAP-Emotion-Test': 'aa',
38
- 'MELD-Sentiment-Test': 'bb',
39
- 'MELD-Emotion-Test': 'cc'
40
  }
41
 
42
  ar_datsets = {
43
- 'VoxCeleb-Accent-Test': 'aa'
44
  }
45
 
46
  gr_datasets = {
47
- 'VoxCeleb-Gender-Test': 'aa',
48
- 'IEMOCAP-Gender-Test': 'bb'
49
  }
50
 
51
  spt_datasets = {
52
- 'Covost2-EN-ID-test': 'aa',
53
- 'Covost2-EN-ZH-test': 'bb',
54
- 'Covost2-EN-TA-test': 'cc',
55
- 'Covost2-ID-EN-test': 'dd',
56
- 'Covost2-ZH-EN-test': 'ee',
57
- 'Covost2-TA-EN-test': 'ff'
58
  }
59
 
60
  cnasr_datasets = {
61
- 'Aishell-ASR-ZH-Test': 'aa'
62
  }
63
 
64
  metrics = {
65
- 'wer': '11',
66
- 'llama3_70b_judge_binary': '22',
67
- 'llama3_70b_judge': '33',
68
- 'meteor': '44',
69
- 'bleu': '55'
70
  }
 
1
+ asr_datsets = {'LibriSpeech-Test-Clean': 'A clean, high-quality testset of the LibriSpeech dataset, used for ASR testing.',
2
+ 'LibriSpeech-Test-Other' : 'A more challenging, noisier testset of the LibriSpeech dataset for ASR testing.',
3
+ 'Common-Voice-15-En-Test': 'Test set from the Common Voice project, which is a crowd-sourced, multilingual speech dataset.',
4
+ 'Peoples-Speech-Test' : 'A large-scale, open-source speech recognition dataset, with diverse accents and domains.',
5
+ 'GigaSpeech-Test' : 'A large-scale ASR dataset with diverse audio sources like podcasts, interviews, etc.',
6
+ 'Earnings21-Test' : 'ASR test dataset focused on earnings calls from 2021, with professional speech and financial jargon.',
7
+ 'Earnings22-Test' : 'Similar to Earnings21, but covering earnings calls from 2022.',
8
+ 'Tedlium3-Test' : 'A test set derived from TED talks, covering diverse speakers and topics.',
9
+ 'Tedlium3-Long-form-Test': 'A longer version of the TED-LIUM dataset, containing extended audio samples. This poses challenges to existing fusion methods in handling long audios. However, it provides benchmark for future development.',
10
+ 'IMDA-Part1-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.',
11
+ 'IMDA-Part2-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.'
12
  }
13
 
14
+ sqa_datasets = {'CN-College-Listen-MCQ-Test': 'Chinese College English Listening Test, with multiple-choice questions.',
15
+ 'DREAM-TTS-MCQ-Test': 'DREAM dataset for spoken question-answering, derived from textual data and synthesized speech.',
16
+ 'SLUE-P2-SQA5-Test': 'Spoken Language Understanding Evaluation (SLUE) dataset, part 2, focused on QA tasks.',
17
+ 'Public-SG-Speech-QA-Test': 'Public dataset for speech-based question answering, gathered from Singapore.',
18
+ 'Spoken-Squad-v1': 'Spoken SQuAD dataset, based on the textual SQuAD dataset, converted into audio.'
19
  }
20
 
21
+ si_datasets = {'OpenHermes-Audio-Test': 'Test set for spoken instructions. Synthesized from the OpenHermes dataset.',
22
+ 'ALPACA-Audio-Test': 'Spoken version of the ALPACA dataset, used for evaluating instruction following in audio.'
23
  }
24
 
25
  ac_datasets = {
26
+ 'WavCaps-Test': 'WavCaps is a dataset for testing audio captioning, where models generate textual descriptions of audio clips.',
27
+ 'AudioCaps-Test': 'AudioCaps dataset, used for generating captions from general audio events.'
28
  }
29
 
30
  asqa_datasets = {
31
+ 'Clotho-AQA-Test': 'Clotho dataset adapted for audio-based question answering, containing audio clips and questions.',
32
+ 'WavCaps-QA-Test': 'Question-answering test dataset derived from WavCaps, focusing on audio content.',
33
+ 'AudioCaps-QA-Test': 'AudioCaps adapted for question-answering tasks, using audio events as input for Q&A.'
34
  }
35
 
36
  er_datasets = {
37
+ 'IEMOCAP-Emotion-Test': 'Emotion recognition test data from the IEMOCAP dataset, focusing on identifying emotions in speech.',
38
+ 'MELD-Sentiment-Test': 'Sentiment recognition from speech using the MELD dataset, classifying positive, negative, or neutral sentiments.',
39
+ 'MELD-Emotion-Test': 'Emotion classification in speech using MELD, detecting specific emotions like happiness, anger, etc.'
40
  }
41
 
42
  ar_datsets = {
43
+ 'VoxCeleb-Accent-Test': 'Test dataset for accent recognition, based on VoxCeleb, a large speaker identification dataset.'
44
  }
45
 
46
  gr_datasets = {
47
+ 'VoxCeleb-Gender-Test': 'Test dataset for gender classification, also derived from VoxCeleb.',
48
+ 'IEMOCAP-Gender-Test': 'Gender classification based on the IEMOCAP dataset.'
49
  }
50
 
51
  spt_datasets = {
52
+ 'Covost2-EN-ID-test': 'Covost 2 dataset for speech translation from English to Indonesian.',
53
+ 'Covost2-EN-ZH-test': 'Covost 2 dataset for speech translation from English to Chinese.',
54
+ 'Covost2-EN-TA-test': 'Covost 2 dataset for speech translation from English to Tamil.',
55
+ 'Covost2-ID-EN-test': 'Covost 2 dataset for speech translation from Indonesian to English.',
56
+ 'Covost2-ZH-EN-test': 'Covost 2 dataset for speech translation from Chinese to English.',
57
+ 'Covost2-TA-EN-test': 'Covost 2 dataset for speech translation from Tamil to English.'
58
  }
59
 
60
  cnasr_datasets = {
61
+ 'Aishell-ASR-ZH-Test': 'ASR test dataset for Mandarin Chinese, based on the Aishell dataset.'
62
  }
63
 
64
  metrics = {
65
+ 'wer': 'Word Error Rate (WER), a common metric for ASR evaluation. (The lower the better)',
66
+ 'llama3_70b_judge_binary': 'Binary evaluation using the LLAMA3-70B model, for tasks requiring a binary outcome. (0-100 based on score 0-1)',
67
+ 'llama3_70b_judge': 'General evaluation using the LLAMA3-70B model, typically scoring based on subjective judgments. (0-100 based on score 0-5)',
68
+ 'meteor': 'METEOR, a metric used for evaluating text generation, often used in translation or summarization tasks. (Sensitive to output length)',
69
+ 'bleu': 'BLEU (Bilingual Evaluation Understudy), another text generation evaluation metric commonly used in machine translation. (Sensitive to output length)',
70
  }
app/draw_diagram.py CHANGED
@@ -10,7 +10,7 @@ import pandas as pd
10
  # huggingface_image = Image.open('style/huggingface.jpg')
11
 
12
  # other info
13
- path = "./additional_info/Leaderboard-Rename.xlsx"
14
  info_df = pd.read_excel(path)
15
 
16
  # def nav_to(value):
 
10
  # huggingface_image = Image.open('style/huggingface.jpg')
11
 
12
  # other info
13
+ path = "./AudioBench-Leaderboard/additional_info/Leaderboard-Rename.xlsx"
14
  info_df = pd.read_excel(path)
15
 
16
  # def nav_to(value):