binwang commited on
Commit
fa6ba7b
·
verified ·
1 Parent(s): 81a8ab0

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app/content.py +7 -0
  2. app/pages.py +30 -1
  3. app/summarization.py +1 -1
app/content.py CHANGED
@@ -68,6 +68,10 @@ cnasr_datasets = {
68
  'Aishell-ASR-ZH-Test': 'ASR test dataset for Mandarin Chinese, based on the Aishell dataset.'
69
  }
70
 
 
 
 
 
71
  metrics = {
72
  'wer': 'Word Error Rate (WER), a common metric for ASR evaluation. (The lower, the better)',
73
  'llama3_70b_judge_binary': 'Binary evaluation using the LLAMA3-70B model, for tasks requiring a binary outcome. (0-100 based on score 0-1)',
@@ -84,6 +88,7 @@ metrics_info = {
84
  'bleu': 'BLEU Score. The higher, the better.',
85
  }
86
 
 
87
  dataname_column_rename_in_table = {
88
  'librispeech_test_clean' : 'LibriSpeech-Clean',
89
  'librispeech_test_other' : 'LibriSpeech-Other',
@@ -126,5 +131,7 @@ dataname_column_rename_in_table = {
126
  'imda_part5_30s_asr_test' : 'IMDA-Part5-30s-ASR',
127
  'imda_part6_30s_asr_test' : 'IMDA-Part6-30s-ASR',
128
 
 
 
129
 
130
  }
 
68
  'Aishell-ASR-ZH-Test': 'ASR test dataset for Mandarin Chinese, based on the Aishell dataset.'
69
  }
70
 
71
+ MUSIC_MCQ_DATASETS = {
72
+ 'MuChoMusic-Test': 'Test dataset for music understanding, from paper: MuChoMusic: Evaluating Music Understanding in Multimodal Audio-Language Models.'
73
+ }
74
+
75
  metrics = {
76
  'wer': 'Word Error Rate (WER), a common metric for ASR evaluation. (The lower, the better)',
77
  'llama3_70b_judge_binary': 'Binary evaluation using the LLAMA3-70B model, for tasks requiring a binary outcome. (0-100 based on score 0-1)',
 
88
  'bleu': 'BLEU Score. The higher, the better.',
89
  }
90
 
91
+
92
  dataname_column_rename_in_table = {
93
  'librispeech_test_clean' : 'LibriSpeech-Clean',
94
  'librispeech_test_other' : 'LibriSpeech-Other',
 
131
  'imda_part5_30s_asr_test' : 'IMDA-Part5-30s-ASR',
132
  'imda_part6_30s_asr_test' : 'IMDA-Part6-30s-ASR',
133
 
134
+ 'muchomusic_test' : 'MuChoMusic'
135
+
136
 
137
  }
app/pages.py CHANGED
@@ -373,8 +373,37 @@ def spt():
373
 
374
  if filter_1:
375
  if filter_1 in sum:
376
- sum_table_mulit_metrix('ST', ['bleu'])
377
  else:
378
  dataset_contents(spt_datasets[filter_1], metrics['bleu'])
379
  draw('su', 'ST', filter_1, 'bleu')
380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
  if filter_1:
375
  if filter_1 in sum:
376
+ sum_table_mulit_metrix('st', ['bleu'])
377
  else:
378
  dataset_contents(spt_datasets[filter_1], metrics['bleu'])
379
  draw('su', 'ST', filter_1, 'bleu')
380
 
381
+
382
+ def music_mcq():
383
+ st.title("Task: Music Understanding - MCQ Questions")
384
+
385
+ sum = ['Overall']
386
+
387
+ dataset_lists = ['MuChoMusic-Test',
388
+ ]
389
+
390
+ filters_levelone = sum + dataset_lists
391
+
392
+ left, center, _, middle, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
393
+
394
+ with left:
395
+ filter_1 = st.selectbox('Dataset', filters_levelone)
396
+
397
+ if filter_1:
398
+ if filter_1 in sum:
399
+ sum_table_mulit_metrix('music_mcq', ['llama3_70b_judge_binary'])
400
+ else:
401
+ dataset_contents(MUSIC_MCQ_DATASETS[filter_1], metrics['llama3_70b_judge_binary'])
402
+ draw('vu', 'music_mcq', filter_1, 'llama3_70b_judge_binary')
403
+
404
+
405
+
406
+
407
+
408
+
409
+
app/summarization.py CHANGED
@@ -21,7 +21,7 @@ def sum_table_mulit_metrix(task_name, metrics_lists: List[str]):
21
  # combine chart data from multiple sources
22
  chart_data = pd.DataFrame()
23
  for metrics in metrics_lists:
24
- folder = f"./results/{metrics}/"
25
  data_path = f'{folder}/{task_name.lower()}.csv'
26
  one_chart_data = pd.read_csv(data_path).round(3)
27
  if len(chart_data) == 0:
 
21
  # combine chart data from multiple sources
22
  chart_data = pd.DataFrame()
23
  for metrics in metrics_lists:
24
+ folder = f"./results/{metrics}"
25
  data_path = f'{folder}/{task_name.lower()}.csv'
26
  one_chart_data = pd.read_csv(data_path).round(3)
27
  if len(chart_data) == 0: