Spaces:
Running
Running
# Group datasets under broad semantic categories | |
semantic_categories = { | |
"Sentiment and Emotion Analysis": [ | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-uruguay", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-mexico", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-spain", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-costa_rica", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-peru", | |
"iberbench/tass-tass-emotion_analysis-2020-spanish", | |
"iberbench/iberlef-restmex-sentiment_analysis-2022-spanish-mexico", | |
"iberbench/iberlef-restmex-sentiment_analysis-2021-spanish-mexico", | |
"iberbench/general-bec-sentiment_analysis-2024-basque", | |
"iberbench/iberlef-emoevales-emotion_analysis-2021-spanish" | |
], | |
"Toxicity and Harmful Language Detection": [ | |
"iberbench/iberlef-detoxis-aggressiveness_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-toxicity_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-improper_language_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-insult_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-mockery_detection-2021-spanish", | |
"iberbench/iberlef-emoevales-offensiveness_detection-2021-spanish", | |
"iberbench/iberlef-meoffendes-offensiveness_detection-2021-spanish", | |
"iberbench/iberlef-mex_a3t-aggressiveness_detection-2019-spanish-mexico", | |
"iberbench/general-hate_check-hate_speech_detection-2024-portuguese" | |
], | |
"Prejudice and Discrimination Detection": [ | |
"iberbench/iberlef-detests_dis-stereotype_detection-2024-spanish", | |
"iberbench/iberlef-exist-sexism_detection-2021-spanish", | |
"iberbench/iberlef-exist-sexism_categorization-2021-spanish", | |
"iberbench/iberlef-huhu-racial_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-women_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-lgtbiq_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-fatphobia_detection-2023-spanish" | |
], | |
"Machine Generated Text Detection and Attribution": [ | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-galician", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-english", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-spanish", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-catalan", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-basque", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-english", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-portuguese", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-portuguese", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-basque", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-catalan", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-galician", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-spanish" | |
], | |
"Irony and Sarcasm Detection": [ | |
"iberbench/iberlef-detoxis-sarcasm_detection-2021-spanish", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-cuba", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-mexico", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-spain" | |
], | |
"Mental Health Detection": [ | |
"iberbench/iberlef-mentalriskes-eating_disorder_detection-2023-spanish", | |
"iberbench/iberlef-mentalriskes-depression_detection-2023-spanish", | |
"iberbench/iberlef-mentalriskes-depression_categorization-2023-spanish" | |
], | |
"Stance Detection": [ | |
"iberbench/iberlef-vaxxstance-stance_detection-2021-basque", | |
"iberbench/iberlef-vaxxstance-stance_detection-2021-spanish", | |
"iberbench/ibereval-multistancecat-stance_detection-2018-spanish", | |
"iberbench/ibereval-multistancecat-stance_detection-2018-catalan" | |
], | |
"Paraphrase Detection": [ | |
"iberbench/iberlef-parmex-paraphrase_detection-2022-spanish-mexico", | |
"iberbench/general-paws_x-paraphrase_detection-2019-spanish", | |
"iberbench/general-paws_gl-paraphrase_detection-2019-galician", | |
"iberbench/general-paws_pt-paraphrase_detection-2019-portuguese", | |
"iberbench/general-paws_ca-paraphrase_detection-2019-catalan", | |
"iberbench/general-parafraseja-paraphrase_detection-2022-catalan" | |
], | |
"Author Profiling": [ | |
"iberbench/pan-author_profiling-gender_detection-2017-spanish", | |
"iberbench/pan-author_profiling-age_detection-2015-spanish", | |
"iberbench/iberlef-meoffendes-gender_detection-2021-spanish", | |
"iberbench/iberlef-restmex-gender_detection-2021-spanish-mexico" | |
], | |
"Fake News Detection": [ | |
"iberbench/iberlef-fakedes-fake_news_detection-2021-spanish" | |
], | |
"Humor Detection": [ | |
"iberbench/iberlef-haha-humor_detection-2021-spanish", | |
"iberbench/iberlef-haha-humor_detection-2019-spanish", | |
"iberbench/iberlef-huhu-humor_detection-2023-spanish" | |
], | |
"Question Answering": [ | |
"iberbench/general-eus_exams-question_answering-2024-basque", | |
"iberbench/general-eus_exams-question_answering-2024-spanish", | |
"iberbench/general-eus_trivia-question_answering-2024-basque", | |
"iberbench/general-xstorycloze-question_answering-2024-galician", | |
"iberbench/general-xstorycloze-question_answering-2024-portuguese", | |
"iberbench/general-xstorycloze-question_answering-2024-catalan", | |
"iberbench/general-openbook_qa-question_answering-2022-spanish", | |
"iberbench/general-openbook_qa-question_answering-2022-catalan", | |
"iberbench/general-arc-question_answering-2024-basque", | |
"iberbench/general-arc-question_answering-2024-catalan" | |
], | |
"Commonsense Reasoning": [ | |
"iberbench/general-xcopa-commonsense_reasoning-2024-basque", | |
"iberbench/general-copa_es-commonsense_reasoning-2024-spanish", | |
"iberbench/general-piqa-commonsense_reasoning-2024-basque", | |
"iberbench/general-copa-commonsense_reasoning-2024-catalan" | |
], | |
"Reading Comprehension": [ | |
"iberbench/general-eus_reading-reading_comprehension-2024-basque", | |
"iberbench/general-belebele-reading_comprehension-2024-basque", | |
"iberbench/general-belebele-reading_comprehension-2024-spanish", | |
"iberbench/general-belebele-reading_comprehension-2024-portuguese", | |
"iberbench/general-belebele-reading_comprehension-2024-catalan" | |
], | |
"Topic Classification": [ | |
"iberbench/general-eus_trivia-topic_classification-2024-basque", | |
"iberbench/general-bhtc-topic_classification-2024-basque", | |
"iberbench/general-clindiagnoses-topic_classification-2024-spanish" | |
], | |
"Linguistic Acceptability": [ | |
"iberbench/general-escola-linguistic_acceptability-2024-spanish", | |
"iberbench/general-galcola-linguistic_acceptability-2024-galician", | |
"iberbench/general-catcola-linguistic_acceptability-2024-catalan" | |
], | |
"Textual Entailment": [ | |
"iberbench/general-qnli-textual_entailment-2024-basque", | |
"iberbench/general-xnli-textual_entailment-2024-spanish", | |
"iberbench/general-xnli-textual_entailment-2024-galician", | |
"iberbench/general-xnli-textual_entailment-2024-catalan", | |
"iberbench/general-teca-textual_entailment-2021-catalan" | |
], | |
"Intent Classification": [ | |
"iberbench/general-fmtodeu-intent_classification-2024-basque" | |
], | |
"Proficiency Evaluation": [ | |
"iberbench/general-eus_proficiency-proficiency_evaluation-2024-basque", | |
"iberbench/general-teleia-proficiency_evaluation-2024-spanish" | |
], | |
"Text Summarization": [ | |
"iberbench/general-xlsum-text_summarization-2021-spanish", | |
"iberbench/general-xlsum-text_summarization-2021-portuguese", | |
"iberbench/general-cabreu-text_summarization-2024-catalan" | |
], | |
"Lexical Analysis": [ | |
"iberbench/iberlef-adobo-lexical_borrowing_chunking-2021-spanish" | |
], | |
"Language Identification": [ | |
"iberbench/tweetlid-tweetlid-language_identification-2014-spanish" | |
] | |
} | |
# Group datasets in "fundamental" or "professional". | |
# Fundamental tasks are those that evaluates knowledge and writting capabilities of LLMs. | |
# Tasks like question answering on exams, reading comprehension, entailment, proficiency evaluation, linguistic acceptability, etc. are under this category. | |
# Professional tasks are those that have economic interest for the industry. | |
# Tasks like author profiling, sentiment analysis, machine-generated text detection, fake news detection, stance, discrimination, etc. are under this category. | |
professional_mapping = { | |
"Fundamental NLP": [ | |
"iberbench/iberlef-adobo-lexical_borrowing_chunking-2021-spanish", | |
"iberbench/general-eus_exams-question_answering-2024-basque", | |
"iberbench/general-eus_exams-question_answering-2024-spanish", | |
"iberbench/general-eus_trivia-topic_classification-2024-basque", | |
"iberbench/general-eus_trivia-question_answering-2024-basque", | |
"iberbench/general-eus_proficiency-proficiency_evaluation-2024-basque", | |
"iberbench/general-xcopa-commonsense_reasoning-2024-basque", | |
"iberbench/general-eus_reading-reading_comprehension-2024-basque", | |
"iberbench/general-arc-question_answering-2024-basque", | |
"iberbench/general-piqa-commonsense_reasoning-2024-basque", | |
"iberbench/general-belebele-reading_comprehension-2024-basque", | |
"iberbench/general-qnli-textual_entailment-2024-basque", | |
"iberbench/general-copa_es-commonsense_reasoning-2024-spanish", | |
"iberbench/general-openbook_qa-question_answering-2022-spanish", | |
"iberbench/general-teleia-proficiency_evaluation-2024-spanish", | |
"iberbench/general-belebele-reading_comprehension-2024-spanish", | |
"iberbench/general-escola-linguistic_acceptability-2024-spanish", | |
"iberbench/general-xnli-textual_entailment-2024-spanish", | |
"iberbench/general-xnli-textual_entailment-2024-galician", | |
"iberbench/general-galcola-linguistic_acceptability-2024-galician", | |
"iberbench/general-xstorycloze-question_answering-2024-galician", | |
"iberbench/general-belebele-reading_comprehension-2024-portuguese", | |
"iberbench/general-xstorycloze-question_answering-2024-portuguese", | |
"iberbench/general-xstorycloze-question_answering-2024-catalan", | |
"iberbench/general-xnli-textual_entailment-2024-catalan", | |
"iberbench/general-arc-question_answering-2024-catalan", | |
"iberbench/general-belebele-reading_comprehension-2024-catalan", | |
"iberbench/general-openbook_qa-question_answering-2022-catalan", | |
"iberbench/general-copa-commonsense_reasoning-2024-catalan", | |
"iberbench/general-catcola-linguistic_acceptability-2024-catalan", | |
"iberbench/general-teca-textual_entailment-2021-catalan", | |
], | |
"Industry NLP": [ | |
"iberbench/iberlef-restmex-sentiment_analysis-2022-spanish-mexico", | |
"iberbench/iberlef-restmex-sentiment_analysis-2021-spanish-mexico", | |
"iberbench/general-parafraseja-paraphrase_detection-2022-catalan", | |
"iberbench/general-paws_ca-paraphrase_detection-2019-catalan", | |
"iberbench/general-cabreu-text_summarization-2024-catalan", | |
"iberbench/general-xlsum-text_summarization-2021-portuguese", | |
"iberbench/general-paws_pt-paraphrase_detection-2019-portuguese", | |
"iberbench/general-paws_gl-paraphrase_detection-2019-galician", | |
"iberbench/general-paws_x-paraphrase_detection-2019-spanish", | |
"iberbench/general-clindiagnoses-topic_classification-2024-spanish", | |
"iberbench/general-fmtodeu-intent_classification-2024-basque", | |
"iberbench/general-bhtc-topic_classification-2024-basque", | |
"iberbench/general-xlsum-text_summarization-2021-spanish", | |
"iberbench/general-bec-sentiment_analysis-2024-basque", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-uruguay", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-mexico", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-spain", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-costa_rica", | |
"iberbench/tass-tass-sentiment_analysis-2020-spanish-peru", | |
"iberbench/tass-tass-emotion_analysis-2020-spanish", | |
"iberbench/iberlef-emoevales-emotion_analysis-2021-spanish", | |
"iberbench/iberlef-haha-humor_detection-2021-spanish", | |
"iberbench/iberlef-haha-humor_detection-2019-spanish", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-cuba", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-mexico", | |
"iberbench/iberlef-irosva-irony_detection-2019-spanish-spain", | |
"iberbench/tweetlid-tweetlid-language_identification-2014-spanish", | |
"iberbench/iberlef-parmex-paraphrase_detection-2022-spanish-mexico", | |
"iberbench/iberlef-detests_dis-stereotype_detection-2024-spanish", | |
"iberbench/iberlef-detoxis-aggressiveness_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-toxicity_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-improper_language_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-sarcasm_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-insult_detection-2021-spanish", | |
"iberbench/iberlef-detoxis-mockery_detection-2021-spanish", | |
"iberbench/iberlef-exist-sexism_detection-2021-spanish", | |
"iberbench/iberlef-emoevales-offensiveness_detection-2021-spanish", | |
"iberbench/iberlef-exist-sexism_categorization-2021-spanish", | |
"iberbench/iberlef-fakedes-fake_news_detection-2021-spanish", | |
"iberbench/iberlef-huhu-racial_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-women_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-lgtbiq_prejudice_detection-2023-spanish", | |
"iberbench/iberlef-huhu-fatphobia_detection-2023-spanish", | |
"iberbench/iberlef-huhu-humor_detection-2023-spanish", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-galician", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-english", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-spanish", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-catalan", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-basque", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-english", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-portuguese", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-portuguese", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-basque", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-catalan", | |
"iberbench/iberlef-iberautextification-machine_generated_text_attribution-2024-galician", | |
"iberbench/iberlef-iberautextification-machine_generated_text_detection-2024-spanish", | |
"iberbench/iberlef-mentalriskes-eating_disorder_detection-2023-spanish", | |
"iberbench/iberlef-vaxxstance-stance_detection-2021-basque", | |
"iberbench/iberlef-vaxxstance-stance_detection-2021-spanish", | |
"iberbench/ibereval-multistancecat-stance_detection-2018-spanish", | |
"iberbench/iberlef-mex_a3t-aggressiveness_detection-2019-spanish-mexico", | |
"iberbench/iberlef-meoffendes-offensiveness_detection-2021-spanish", | |
"iberbench/iberlef-meoffendes-gender_detection-2021-spanish", | |
"iberbench/iberlef-mentalriskes-depression_detection-2023-spanish", | |
"iberbench/ibereval-multistancecat-stance_detection-2018-catalan", | |
"iberbench/iberlef-mentalriskes-depression_categorization-2023-spanish", | |
"iberbench/iberlef-restmex-gender_detection-2021-spanish-mexico", | |
"iberbench/pan-author_profiling-gender_detection-2017-spanish", | |
"iberbench/pan-author_profiling-age_detection-2015-spanish", | |
"iberbench/general-hate_check-hate_speech_detection-2024-portuguese" | |
] | |
} |