pl-asr-leaderboard / constants.py
mj-new
updated leaderboard - added poleval test sets
32c749f
ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
If you want to add your system or dataset to the leaderboard, please contact Michał Junczyk ([email protected]) or open a pull request on [GitHub](https://github.com/goodmike31/pl-asr-bigos-tools) <br>\
To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard).<br> \
If you use this work, please cite it as follows: <br> \
```@misc{amu_cai_pl_asr_leaderboard, \
author = {Michał Junczyk}, \
title = {{AMU Polish ASR Leaderboard}}, \
year = {2024}, \
howpublished = {url{https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard}}, \
publisher = {Hugging Face} \
}```"
BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"
PELCRA_INFO = "PELCRA for BIGOS is the subset of speech corpora created by the [PELCRA group](http://pelcra.pl/new/), curated for the BIGOS benchmark by the [AMU-CAI team](https://huggingface.co/amu-cai). \
Learn more [here](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos)"
POLEVAL_INFO = "PolEval is test used for Polish ASR challenge. It consists of recordings from BIGOS and PELCRA datasets. For details see: [PolEval 2024 - Task 3 - ASR](https://poleval.pl/tasks/task3)"
ANALYSIS_INFO = "Here we examine ASR accuracy depending on the system type, model size, audio duration, speaking rate and speaker charactertics (age and gender)"
INSPECTION_INFO = "Here you can inspect the performance of specific ASR systems on the specific audio samples"
COMPARISON_INFO = "Here you can compare the performance of different ASR systems on the specific datasets using metrics and visualizations of your choice."
asr_systems_colors_mapping = {
'azure': '#1f77b4', # Blue
'google': '#2ca02c', # Green
'wav2vec2': '#d62728', # Red
'nemo': '#9467bd', # Purple
'assemblyai': '#8c564b', # Brown
'mms': '#e377c2', # Pink
'google_v2': '#7f7f7f', # Gray
'whisper_cloud': '#bcbd22', # Olive
'whisper_local': '#ff7f0e', # Orange
# Add or override other systems and their colors
}