leaderboard / src /task_mappings.py
José Ángel González
fix task mappings ,
7018bc0
raw
history blame
16.6 kB
# Group datasets under broad semantic categories
semantic_categories = {
"Sentiment and Emotion Analysis": [
"iberbench/tass-tass-sentiment_analysis-2020-spanish-uruguay",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-mexico",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-spain",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-costa_rica",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-peru",
"iberbench/tass-tass-emotion_analysis-2020-spanish",
"iberbench/iberlef-restmex-sentiment_analysis-2022-spanish-mexico",
"iberbench/iberlef-restmex-sentiment_analysis-2021-spanish-mexico",
"iberbench/general-bec-sentiment_analysis-2024-basque",
"iberbench/iberlef-emoevales-emotion_analysis-2021-spanish"
],
"Toxicity and Harmful Language Detection": [
"iberbench/iberlef-detoxis-aggressiveness_detection-2021-spanish",
"iberbench/iberlef-detoxis-toxicity_detection-2021-spanish",
"iberbench/iberlef-detoxis-improper_language_detection-2021-spanish",
"iberbench/iberlef-detoxis-insult_detection-2021-spanish",
"iberbench/iberlef-detoxis-mockery_detection-2021-spanish",
"iberbench/iberlef-emoevales-offensiveness_detection-2021-spanish",
"iberbench/iberlef-meoffendes-offensiveness_detection-2021-spanish",
"iberbench/iberlef-mex_a3t-aggressiveness_detection-2019-spanish-mexico",
"iberbench/general-hate_check-hate_speech_detection-2024-portuguese"
],
"Prejudice and Discrimination Detection": [
"iberbench/iberlef-detests_dis-stereotype_detection-2024-spanish",
"iberbench/iberlef-exist-sexism_detection-2021-spanish",
"iberbench/iberlef-exist-sexism_categorization-2021-spanish",
"iberbench/iberlef-huhu-racial_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-women_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-lgtbiq_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-fatphobia_detection-2023-spanish"
],
"Machine Generated Text Detection and Attribution": [
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-galician",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-english",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-spanish",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-catalan",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-basque",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-english",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-portuguese",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-portuguese",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-basque",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-catalan",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-galician",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-spanish"
],
"Irony and Sarcasm Detection": [
"iberbench/iberlef-detoxis-sarcasm_detection-2021-spanish",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-cuba",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-mexico",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-spain"
],
"Mental Health Detection": [
"iberbench/iberlef-mentalriskes-eating_disorder_detection-2023-spanish",
"iberbench/iberlef-mentalriskes-depression_detection-2023-spanish",
"iberbench/iberlef-mentalriskes-depression_categorization-2023-spanish"
],
"Stance Detection": [
"iberbench/iberlef-vaxxstance-stance_detection-2021-basque",
"iberbench/iberlef-vaxxstance-stance_detection-2021-spanish",
"iberbench/ibereval-multistancecat-stance_detection-2018-spanish",
"iberbench/ibereval-multistancecat-stance_detection-2018-catalan"
],
"Paraphrase Detection": [
"iberbench/iberlef-parmex-paraphrase_detection-2022-spanish-mexico",
"iberbench/general-paws_x-paraphrase_detection-2019-spanish",
"iberbench/general-paws_gl-paraphrase_detection-2019-galician",
"iberbench/general-paws_pt-paraphrase_detection-2019-portuguese",
"iberbench/general-paws_ca-paraphrase_detection-2019-catalan",
"iberbench/general-parafraseja-paraphrase_detection-2022-catalan"
],
"Author Profiling": [
"iberbench/pan-author_profiling-gender_detection-2017-spanish",
"iberbench/pan-author_profiling-age_detection-2015-spanish",
"iberbench/iberlef-meoffendes-gender_detection-2021-spanish",
"iberbench/iberlef-restmex-gender_detection-2021-spanish-mexico"
],
"Fake News Detection": [
"iberbench/iberlef-fakedes-fake_news_detection-2021-spanish"
],
"Humor Detection": [
"iberbench/iberlef-haha-humor_detection-2021-spanish",
"iberbench/iberlef-haha-humor_detection-2019-spanish",
"iberbench/iberlef-huhu-humor_detection-2023-spanish"
],
"Question Answering": [
"iberbench/general-eus_exams-question_answering-2024-basque",
"iberbench/general-eus_exams-question_answering-2024-spanish",
"iberbench/general-eus_trivia-question_answering-2024-basque",
"iberbench/general-xstorycloze-question_answering-2024-galician",
"iberbench/general-xstorycloze-question_answering-2024-portuguese",
"iberbench/general-xstorycloze-question_answering-2024-catalan",
"iberbench/general-openbook_qa-question_answering-2022-spanish",
"iberbench/general-openbook_qa-question_answering-2022-catalan",
"iberbench/general-arc-question_answering-2024-basque",
"iberbench/general-arc-question_answering-2024-catalan"
],
"Commonsense Reasoning": [
"iberbench/general-xcopa-commonsense_reasoning-2024-basque",
"iberbench/general-copa_es-commonsense_reasoning-2024-spanish",
"iberbench/general-piqa-commonsense_reasoning-2024-basque",
"iberbench/general-copa-commonsense_reasoning-2024-catalan"
],
"Reading Comprehension": [
"iberbench/general-eus_reading-reading_comprehension-2024-basque",
"iberbench/general-belebele-reading_comprehension-2024-basque",
"iberbench/general-belebele-reading_comprehension-2024-spanish",
"iberbench/general-belebele-reading_comprehension-2024-portuguese",
"iberbench/general-belebele-reading_comprehension-2024-catalan"
],
"Topic Classification": [
"iberbench/general-eus_trivia-topic_classification-2024-basque",
"iberbench/general-bhtc-topic_classification-2024-basque",
"iberbench/general-clindiagnoses-topic_classification-2024-spanish"
],
"Linguistic Acceptability": [
"iberbench/general-escola-linguistic_acceptability-2024-spanish",
"iberbench/general-galcola-linguistic_acceptability-2024-galician",
"iberbench/general-catcola-linguistic_acceptability-2024-catalan"
],
"Textual Entailment": [
"iberbench/general-qnli-textual_entailment-2024-basque",
"iberbench/general-xnli-textual_entailment-2024-spanish",
"iberbench/general-xnli-textual_entailment-2024-galician",
"iberbench/general-xnli-textual_entailment-2024-catalan",
"iberbench/general-teca-textual_entailment-2021-catalan"
],
"Intent Classification": [
"iberbench/general-fmtodeu-intent_classification-2024-basque"
],
"Proficiency Evaluation": [
"iberbench/general-eus_proficiency-proficiency_evaluation-2024-basque",
"iberbench/general-teleia-proficiency_evaluation-2024-spanish"
],
"Text Summarization": [
"iberbench/general-xlsum-text_summarization-2021-spanish",
"iberbench/general-xlsum-text_summarization-2021-portuguese",
"iberbench/general-cabreu-text_summarization-2024-catalan"
],
"Lexical Analysis": [
"iberbench/iberlef-adobo-lexical_borrowing_chunking-2021-spanish"
],
"Language Identification": [
"iberbench/tweetlid-tweetlid-language_identification-2014-spanish"
]
}
# Group datasets in "fundamental" or "professional".
# Fundamental tasks are those that evaluates knowledge and writting capabilities of LLMs.
# Tasks like question answering on exams, reading comprehension, entailment, proficiency evaluation, linguistic acceptability, etc. are under this category.
# Professional tasks are those that have economic interest for the industry.
# Tasks like author profiling, sentiment analysis, machine-generated text detection, fake news detection, stance, discrimination, etc. are under this category.
professional_mapping = {
"Fundamental NLP": [
"iberbench/iberlef-adobo-lexical_borrowing_chunking-2021-spanish",
"iberbench/general-eus_exams-question_answering-2024-basque",
"iberbench/general-eus_exams-question_answering-2024-spanish",
"iberbench/general-eus_trivia-topic_classification-2024-basque",
"iberbench/general-eus_trivia-question_answering-2024-basque",
"iberbench/general-eus_proficiency-proficiency_evaluation-2024-basque",
"iberbench/general-xcopa-commonsense_reasoning-2024-basque",
"iberbench/general-eus_reading-reading_comprehension-2024-basque",
"iberbench/general-arc-question_answering-2024-basque",
"iberbench/general-piqa-commonsense_reasoning-2024-basque",
"iberbench/general-belebele-reading_comprehension-2024-basque",
"iberbench/general-qnli-textual_entailment-2024-basque",
"iberbench/general-copa_es-commonsense_reasoning-2024-spanish",
"iberbench/general-openbook_qa-question_answering-2022-spanish",
"iberbench/general-teleia-proficiency_evaluation-2024-spanish",
"iberbench/general-belebele-reading_comprehension-2024-spanish",
"iberbench/general-escola-linguistic_acceptability-2024-spanish",
"iberbench/general-xnli-textual_entailment-2024-spanish",
"iberbench/general-xnli-textual_entailment-2024-galician",
"iberbench/general-galcola-linguistic_acceptability-2024-galician",
"iberbench/general-xstorycloze-question_answering-2024-galician",
"iberbench/general-belebele-reading_comprehension-2024-portuguese",
"iberbench/general-xstorycloze-question_answering-2024-portuguese",
"iberbench/general-xstorycloze-question_answering-2024-catalan",
"iberbench/general-xnli-textual_entailment-2024-catalan",
"iberbench/general-arc-question_answering-2024-catalan",
"iberbench/general-belebele-reading_comprehension-2024-catalan",
"iberbench/general-openbook_qa-question_answering-2022-catalan",
"iberbench/general-copa-commonsense_reasoning-2024-catalan",
"iberbench/general-catcola-linguistic_acceptability-2024-catalan",
"iberbench/general-teca-textual_entailment-2021-catalan",
],
"Industry NLP": [
"iberbench/iberlef-restmex-sentiment_analysis-2022-spanish-mexico",
"iberbench/iberlef-restmex-sentiment_analysis-2021-spanish-mexico",
"iberbench/general-parafraseja-paraphrase_detection-2022-catalan",
"iberbench/general-paws_ca-paraphrase_detection-2019-catalan",
"iberbench/general-cabreu-text_summarization-2024-catalan",
"iberbench/general-xlsum-text_summarization-2021-portuguese",
"iberbench/general-paws_pt-paraphrase_detection-2019-portuguese",
"iberbench/general-paws_gl-paraphrase_detection-2019-galician",
"iberbench/general-paws_x-paraphrase_detection-2019-spanish",
"iberbench/general-clindiagnoses-topic_classification-2024-spanish",
"iberbench/general-fmtodeu-intent_classification-2024-basque",
"iberbench/general-bhtc-topic_classification-2024-basque",
"iberbench/general-xlsum-text_summarization-2021-spanish",
"iberbench/general-bec-sentiment_analysis-2024-basque",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-uruguay",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-mexico",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-spain",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-costa_rica",
"iberbench/tass-tass-sentiment_analysis-2020-spanish-peru",
"iberbench/tass-tass-emotion_analysis-2020-spanish",
"iberbench/iberlef-emoevales-emotion_analysis-2021-spanish",
"iberbench/iberlef-haha-humor_detection-2021-spanish",
"iberbench/iberlef-haha-humor_detection-2019-spanish",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-cuba",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-mexico",
"iberbench/iberlef-irosva-irony_detection-2019-spanish-spain",
"iberbench/tweetlid-tweetlid-language_identification-2014-spanish",
"iberbench/iberlef-parmex-paraphrase_detection-2022-spanish-mexico",
"iberbench/iberlef-detests_dis-stereotype_detection-2024-spanish",
"iberbench/iberlef-detoxis-aggressiveness_detection-2021-spanish",
"iberbench/iberlef-detoxis-toxicity_detection-2021-spanish",
"iberbench/iberlef-detoxis-improper_language_detection-2021-spanish",
"iberbench/iberlef-detoxis-sarcasm_detection-2021-spanish",
"iberbench/iberlef-detoxis-insult_detection-2021-spanish",
"iberbench/iberlef-detoxis-mockery_detection-2021-spanish",
"iberbench/iberlef-exist-sexism_detection-2021-spanish",
"iberbench/iberlef-emoevales-offensiveness_detection-2021-spanish",
"iberbench/iberlef-exist-sexism_categorization-2021-spanish",
"iberbench/iberlef-fakedes-fake_news_detection-2021-spanish",
"iberbench/iberlef-huhu-racial_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-women_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-lgtbiq_prejudice_detection-2023-spanish",
"iberbench/iberlef-huhu-fatphobia_detection-2023-spanish",
"iberbench/iberlef-huhu-humor_detection-2023-spanish",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-galician",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-english",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-spanish",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-catalan",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-basque",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-english",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-portuguese",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-portuguese",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-basque",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-catalan",
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-galician",
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-spanish",
"iberbench/iberlef-mentalriskes-eating_disorder_detection-2023-spanish",
"iberbench/iberlef-vaxxstance-stance_detection-2021-basque",
"iberbench/iberlef-vaxxstance-stance_detection-2021-spanish",
"iberbench/ibereval-multistancecat-stance_detection-2018-spanish",
"iberbench/iberlef-mex_a3t-aggressiveness_detection-2019-spanish-mexico",
"iberbench/iberlef-meoffendes-offensiveness_detection-2021-spanish",
"iberbench/iberlef-meoffendes-gender_detection-2021-spanish",
"iberbench/iberlef-mentalriskes-depression_detection-2023-spanish",
"iberbench/ibereval-multistancecat-stance_detection-2018-catalan",
"iberbench/iberlef-mentalriskes-depression_categorization-2023-spanish",
"iberbench/iberlef-restmex-gender_detection-2021-spanish-mexico",
"iberbench/pan-author_profiling-gender_detection-2017-spanish",
"iberbench/pan-author_profiling-age_detection-2015-spanish",
"iberbench/general-hate_check-hate_speech_detection-2024-portuguese"
]
}