zhuohan-7's picture
Upload folder using huggingface_hub
f2b616f verified
raw
history blame
11.7 kB
import streamlit as st
from app.draw_diagram import *
from app.content import *
def dataset_contents(dataset, metrics):
custom_css = """
<style>
.my-dataset-info {
# background-color: #F9EBEA;
# padding: 10px;
color: #626567;
font-style: italic;
font-size: 8px;
height: auto;
}
</style>
"""
st.markdown(custom_css, unsafe_allow_html=True)
st.markdown(f"""<div class="my-dataset-info">
<p>DATASET INFORMATION: {dataset}</p>
</div>""", unsafe_allow_html=True)
st.markdown(f"""<div class="my-dataset-info">
<p>METRIC INFORMATION: {metrics}</p>
</div>""", unsafe_allow_html=True)
def dashboard():
with st.container():
st.title("AudioBench")
st.markdown("""
[gh]: https://github.com/AudioLLMs/AudioBench
[![GitHub watchers](https://img.shields.io/github/watchers/AudioLLMs/AudioBench?style=social)][gh]
[![GitHub Repo stars](https://img.shields.io/github/stars/AudioLLMs/AudioBench?style=social)][gh]
""")
audio_url = "https://arxiv.org/abs/2406.16020"
st.divider()
st.markdown("#### [AudioBench](%s)" % audio_url)
st.markdown("##### :dizzy: A comprehensive evaluation benchmark designed for general instruction-following audiolanguage models")
st.markdown('''
''')
with st.container():
left_co, center_co, right_co = st.columns([0.5,1, 0.5])
with center_co:
st.image("./style/audio_overview.png",
caption="Overview of the datasets in AudioBench.",
use_column_width = True)
st.markdown('''
''')
st.markdown("###### :dart: Our Benchmark includes: ")
cols = st.columns(10)
cols[1].metric(label="Tasks", value="8") #delta="Tasks", delta_color="off"
cols[2].metric(label="Datasets", value="26")
cols[3].metric(label="Test Models", value="5")
# st.markdown("###### :dart: Supported Models and Datasets: ")
# sup = pd.DataFrame(
# {"Dataset": "LibriSpeech-Clean",
# "Category": st.selectbox('category', ['Speech Understanding']),
# "Task": st.selectbox('task', ['Automatic Speech Recognition']),
# "Metrics": st.selectbox('metrics', ['WER']),
# "Status":True}
# )
# st.data_editor(sup, num_rows="dynamic")
st.divider()
with st.container():
st.markdown("##### Citations")
st.markdown('''
:round_pushpin: AudioBench Paper \n
@article{wang2024audiobench,
title={AudioBench: A Universal Benchmark for Audio Large Language Models},
author={Wang, Bin and Zou, Xunlong and Lin, Geyu and Sun, Shuo and Liu, Zhuohan and Zhang, Wenyu and Liu, Zhengyuan and Aw, AiTi and Chen, Nancy F},
journal={arXiv preprint arXiv:2406.16020},
year={2024}
}
''')
def asr():
st.title("Automatic Speech Recognition")
filters_levelone = ['LibriSpeech-Test-Clean',
'LibriSpeech-Test-Other',
'Common-Voice-15-En-Test',
'Peoples-Speech-Test',
'GigaSpeech-Test',
'Earnings21-Test',
'Earnings22-Test',
'Tedlium3-Test',
'Tedlium3-Longform-Test',
'IMDA-Part1-ASR-Test',
'IMDA-Part2-ASR-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
# with middle:
# if filter_1 == filters_levelone[0]:
# sort_leveltwo = ['LibriSpeech-Test-Clean', 'LibriSpeech-Test-Other', 'Common-Voice-15-En-Test', 'Peoples-Speech-Test',
# 'GigaSpeech-Test', 'Tedlium3-Test','Tedlium3-Longform-Test', 'Earning-21-Test', 'Earning-22-Test']
# elif filter_1 == filters_levelone[1]:
# sort_leveltwo = ['CN-College-Listen-Test', 'SLUE-P2-SQA5-Test', 'DREAM-TTS-Test', 'Public-SG-SpeechQA-Test']
# elif filter_1 == filters_levelone[2]:
# sort_leveltwo = ['OpenHermes-Audio-Test', 'ALPACA-Audio-Test']
# sort = st.selectbox("Sort Dataset", sort_leveltwo)
# with right:
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
if filter_1:
dataset_contents(asr_datsets[filter_1], metrics['wer'])
draw('su', 'ASR', filter_1, 'wer')
# else:
# draw('su', 'ASR', 'LibriSpeech-Test-Clean', 'wer')
## examples
def sqa():
st.title("Speech Question Answering")
binary = ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']
rest = ['SLUE-P2-SQA5-Test',
'Public-SG-Speech-QA-Test',
'Spoken-Squad-v1']
filters_levelone = binary + rest
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
if filter_1 in binary:
dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge_binary'])
draw('su', 'SQA', filter_1, 'llama3_70b_judge_binary')
else:
dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge'])
draw('su', 'SQA', filter_1, 'llama3_70b_judge')
# else:
# draw('su', 'SQA', 'CN-College-Listen-Test', 'llama3_70b_judge_binary')
def si():
st.title("Speech Question Answering")
filters_levelone = ['OpenHermes-Audio-Test',
'ALPACA-Audio-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(si_datasets[filter_1], metrics['llama3_70b_judge'])
draw('su', 'SI', filter_1, 'llama3_70b_judge')
# else:
# draw('su', 'SI', 'OpenHermes-Audio-Test', 'llama3_70b_judge')
def ac():
st.title("Audio Captioning")
filters_levelone = ['WavCaps-Test',
'AudioCaps-Test']
filters_leveltwo = ['Llama3-70b-judge', 'Meteor']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
with middle:
metric = st.selectbox('Select Metric', filters_leveltwo)
# with middle:
# if filter_1 == filters_levelone[0]:
# sort_leveltwo = ['Clotho-AQA-Test', 'WavCaps-QA-Test', 'AudioCaps-QA-Test']
# elif filter_1 == filters_levelone[1]:
# sort_leveltwo = ['WavCaps-Test', 'AudioCaps-Test']
# sort = st.selectbox("Sort Dataset", sort_leveltwo)
# with right:
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
if filter_1 or metric:
dataset_contents(ac_datasets[filter_1], metrics[metric.lower().replace('-', '_')])
draw('asu', 'AC',filter_1, metric.lower().replace('-', '_'))
# else:
# draw('asu', 'AC', 'WavCaps-Test', 'llama3_70b_judge')
def asqa():
st.title("Audio Scene Question Answering")
filters_levelone = ['Clotho-AQA-Test',
'WavCaps-QA-Test',
'AudioCaps-QA-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(asqa_datasets[filter_1], metrics['llama3_70b_judge'])
draw('asu', 'AQA',filter_1, 'llama3_70b_judge')
# else:
# draw('asu', 'AQA', 'Clotho-AQA-Test', 'llama3_70b_judge')
def er():
st.title("Emotion Recognition")
filters_levelone = ['IEMOCAP-Emotion-Test',
'MELD-Sentiment-Test',
'MELD-Emotion-Test']
# sort_leveltwo = []
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
# with middle:
# if filter_1 == filters_levelone[0]:
# sort_leveltwo = ['IEMOCAP-Emotion-Test', 'MELD-Sentiment-Test', 'MELD-Emotion-Test']
# elif filter_1 == filters_levelone[1]:
# sort_leveltwo = ['VoxCeleb1-Accent-Test']
# elif filter_1 == filters_levelone[2]:
# sort_leveltwo = ['VoxCeleb1-Gender-Test', 'IEMOCAP-Gender-Test']
# sort = st.selectbox("Sort Dataset", sort_leveltwo)
# with right:
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
if filter_1:
dataset_contents(er_datasets[filter_1], metrics['llama3_70b_judge_binary'])
draw('vu', 'ER', filter_1, 'llama3_70b_judge_binary')
# else:
# draw('vu', 'ER', 'IEMOCAP-Emotion-Test', 'llama3_70b_judge_binary')
def ar():
st.title("Accent Recognition")
filters_levelone = ['VoxCeleb-Accent-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(ar_datsets[filter_1], metrics['llama3_70b_judge'])
draw('vu', 'AR', filter_1, 'llama3_70b_judge')
def gr():
st.title("Emotion Recognition")
filters_levelone = ['VoxCeleb-Gender-Test',
'IEMOCAP-Gender-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(gr_datasets[filter_1], metrics['llama3_70b_judge_binary'])
draw('vu', 'GR', filter_1, 'llama3_70b_judge_binary')
# else:
# draw('vu', 'GR', 'VoxCeleb1-Gender-Test', 'llama3_70b_judge_binary')
def spt():
st.title("Speech Translation")
filters_levelone = ['Covost2-EN-ID-test',
'Covost2-EN-ZH-test',
'Covost2-EN-TA-test',
'Covost2-ID-EN-test',
'Covost2-ZH-EN-test',
'Covost2-TA-EN-test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(spt_datasets[filter_1], metrics['bleu'])
draw('su', 'ST', filter_1, 'bleu')
# else:
# draw('su', 'ST', 'Covost2-EN-ID-test', 'bleu')
def cnasr():
st.title("Chinese Automatic Speech Recognition")
filters_levelone = ['Aishell-ASR-ZH-Test']
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
with left:
filter_1 = st.selectbox('Select Dataset', filters_levelone)
if filter_1:
dataset_contents(cnasr_datasets[filter_1], metrics['wer'])
draw('su', 'CNASR', filter_1, 'wer')
# else:
# draw('su', 'CNASR', 'Aishell-ASR-ZH-Test', 'wer')