sileod commited on
Commit
89442a5
1 Parent(s): 25f0f18

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +99 -74
README.md CHANGED
@@ -1,18 +1,15 @@
1
  ---
2
  datasets:
3
  - nyu-mll/glue
4
- - super_glue
5
  - facebook/anli
6
  - tasksource/babi_nli
7
  - sick
8
  - snli
9
  - scitail
10
- - OpenAssistant/oasst1
11
- - universal_dependencies
12
  - hans
13
- - qbao775/PARARULE-Plus
14
  - alisawuffles/WANLI
15
- - metaeval/recast
16
  - sileod/probability_words_nli
17
  - joey234/nan-nli
18
  - pietrolesci/nli_fever
@@ -22,25 +19,17 @@ datasets:
22
  - pietrolesci/dialogue_nli
23
  - pietrolesci/mpe
24
  - pietrolesci/dnc
25
- - pietrolesci/gpt3_nli
26
  - pietrolesci/recast_white
27
  - pietrolesci/joci
28
- - martn-nguyen/contrast_nli
29
  - pietrolesci/robust_nli
30
  - pietrolesci/robust_nli_is_sd
31
  - pietrolesci/robust_nli_li_ts
32
  - pietrolesci/gen_debiased_nli
33
  - pietrolesci/add_one_rte
34
- - metaeval/imppres
35
- - pietrolesci/glue_diagnostics
36
  - hlgd
37
- - PolyAI/banking77
38
  - paws
39
- - quora
40
  - medical_questions_pairs
41
- - conll2003
42
- - nlpaueb/finer-139
43
- - Anthropic/hh-rlhf
44
  - Anthropic/model-written-evals
45
  - truthful_qa
46
  - nightingal3/fig-qa
@@ -63,7 +52,6 @@ datasets:
63
  - pkavumba/balanced-copa
64
  - 12ml/e-CARE
65
  - art
66
- - tasksource/mmlu
67
  - winogrande
68
  - codah
69
  - ai2_arc
@@ -99,7 +87,6 @@ datasets:
99
  - sms_spam
100
  - humicroedit
101
  - snips_built_in_intents
102
- - banking77
103
  - hate_speech_offensive
104
  - yahoo_answers_topics
105
  - pacovaldez/stackoverflow-questions
@@ -110,19 +97,13 @@ datasets:
110
  - allenai/scicite
111
  - liar
112
  - relbert/lexical_relation_classification
113
- - metaeval/linguisticprobing
114
  - tasksource/crowdflower
115
  - metaeval/ethics
116
  - emo
117
  - google_wellformed_query
118
  - tweets_hate_speech_detection
119
  - has_part
120
- - wnut_17
121
- - ncbi_disease
122
- - acronym_identification
123
- - jnlpba
124
- - species_800
125
- - SpeedOfMagic/ontonotes_english
126
  - blog_authorship_corpus
127
  - launch/open_question_type
128
  - health_fact
@@ -141,7 +122,6 @@ datasets:
141
  - sem_eval_2010_task_8
142
  - demo-org/auditor_review
143
  - medmcqa
144
- - aqua_rat
145
  - RuyuanWan/Dynasent_Disagreement
146
  - RuyuanWan/Politeness_Disagreement
147
  - RuyuanWan/SBIC_Disagreement
@@ -149,93 +129,87 @@ datasets:
149
  - RuyuanWan/Dilemmas_Disagreement
150
  - lucasmccabe/logiqa
151
  - wiki_qa
152
- - metaeval/cycic_classification
153
- - metaeval/cycic_multiplechoice
154
- - metaeval/sts-companion
155
- - metaeval/commonsense_qa_2.0
156
- - metaeval/lingnli
157
- - metaeval/monotonicity-entailment
158
- - metaeval/arct
159
- - metaeval/scinli
160
- - metaeval/naturallogic
161
  - onestop_qa
162
  - demelin/moral_stories
163
  - corypaik/prost
164
  - aps/dynahate
165
  - metaeval/syntactic-augmentation-nli
166
- - metaeval/autotnli
167
  - lasha-nlp/CONDAQA
168
  - openai/webgpt_comparisons
169
  - Dahoas/synthetic-instruct-gptj-pairwise
170
  - metaeval/scruples
171
  - metaeval/wouldyourather
172
- - sileod/attempto-nli
173
  - metaeval/defeasible-nli
174
- - metaeval/help-nli
175
  - metaeval/nli-veridicality-transitivity
176
- - metaeval/natural-language-satisfiability
177
- - metaeval/lonli
178
  - tasksource/dadc-limit-nli
179
  - ColumbiaNLP/FLUTE
180
- - metaeval/strategy-qa
181
  - openai/summarize_from_feedback
182
  - tasksource/folio
183
- - metaeval/tomi-nli
184
- - metaeval/avicenna
 
185
  - stanfordnlp/SHP
186
  - GBaker/MedQA-USMLE-4-options-hf
187
- - GBaker/MedQA-USMLE-4-options
188
  - sileod/wikimedqa
189
  - declare-lab/cicero
190
  - amydeng2000/CREAK
191
- - metaeval/mutual
192
  - inverse-scaling/NeQA
193
  - inverse-scaling/quote-repetition
194
  - inverse-scaling/redefine-math
195
  - tasksource/puzzte
196
- - metaeval/implicatures
197
  - race
198
- - metaeval/spartqa-yn
199
- - metaeval/spartqa-mchoice
200
- - metaeval/temporal-nli
201
- - metaeval/ScienceQA_text_only
202
- - AndyChiang/cloth
203
- - metaeval/logiqa-2.0-nli
204
- - tasksource/oasst1_dense_flat
205
- - metaeval/boolq-natural-perturbations
206
- - metaeval/path-naturalness-prediction
207
  - riddle_sense
 
 
 
 
 
 
 
 
 
 
208
  - Jiangjie/ekar_english
209
- - metaeval/implicit-hate-stg1
210
  - metaeval/chaos-mnli-ambiguity
211
  - IlyaGusev/headline_cause
212
- - metaeval/race-c
213
- - metaeval/equate
 
214
  - metaeval/ambient
215
- - AndyChiang/dgen
216
- - metaeval/clcd-english
217
  - civil_comments
218
- - metaeval/acceptability-prediction
219
- - maximedb/twentyquestions
220
- - metaeval/counterfactually-augmented-snli
221
  - tasksource/I2D2
222
- - sileod/mindgames
223
- - metaeval/counterfactually-augmented-imdb
224
- - metaeval/cnli
225
- - metaeval/reclor
226
- - tasksource/oasst1_pairwise_rlhf_reward
227
- - tasksource/zero-shot-label-nli
228
  - webis/args_me
229
  - webis/Touche23-ValueEval
230
  - tasksource/starcon
231
- - tasksource/ruletaker
232
- - lighteval/lsat_qa
233
  - tasksource/ConTRoL-nli
234
  - tasksource/tracie
235
  - tasksource/sherliic
236
  - tasksource/sen-making
237
  - tasksource/winowhy
238
- - mediabiasgroup/mbib-base
239
  - tasksource/robustLR
240
  - CLUTRR/v1
241
  - tasksource/logical-fallacy
@@ -247,15 +221,15 @@ datasets:
247
  - tasksource/TroFi
248
  - sharc_modified
249
  - tasksource/conceptrules_v2
250
- - tasksource/disrpt
251
- - conll2000
252
- - DFKI-SLT/few-nerd
253
  - tasksource/com2sense
254
  - tasksource/scone
255
  - tasksource/winodict
256
  - tasksource/fool-me-twice
257
  - tasksource/monli
258
  - tasksource/corr2cause
 
259
  - tasksource/apt
260
  - zeroshot/twitter-financial-news-sentiment
261
  - tasksource/icl-symbol-tuning-instruct
@@ -263,8 +237,59 @@ datasets:
263
  - sihaochen/propsegment
264
  - HannahRoseKirk/HatemojiBuild
265
  - tasksource/regset
266
- - tasksource/babi_nli
267
  - lmsys/chatbot_arena_conversations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  language: en
269
  library_name: transformers
270
  license: apache-2.0
 
1
  ---
2
  datasets:
3
  - nyu-mll/glue
4
+ - aps/super_glue
5
  - facebook/anli
6
  - tasksource/babi_nli
7
  - sick
8
  - snli
9
  - scitail
 
 
10
  - hans
 
11
  - alisawuffles/WANLI
12
+ - tasksource/recast
13
  - sileod/probability_words_nli
14
  - joey234/nan-nli
15
  - pietrolesci/nli_fever
 
19
  - pietrolesci/dialogue_nli
20
  - pietrolesci/mpe
21
  - pietrolesci/dnc
 
22
  - pietrolesci/recast_white
23
  - pietrolesci/joci
 
24
  - pietrolesci/robust_nli
25
  - pietrolesci/robust_nli_is_sd
26
  - pietrolesci/robust_nli_li_ts
27
  - pietrolesci/gen_debiased_nli
28
  - pietrolesci/add_one_rte
29
+ - tasksource/imppres
 
30
  - hlgd
 
31
  - paws
 
32
  - medical_questions_pairs
 
 
 
33
  - Anthropic/model-written-evals
34
  - truthful_qa
35
  - nightingal3/fig-qa
 
52
  - pkavumba/balanced-copa
53
  - 12ml/e-CARE
54
  - art
 
55
  - winogrande
56
  - codah
57
  - ai2_arc
 
87
  - sms_spam
88
  - humicroedit
89
  - snips_built_in_intents
 
90
  - hate_speech_offensive
91
  - yahoo_answers_topics
92
  - pacovaldez/stackoverflow-questions
 
97
  - allenai/scicite
98
  - liar
99
  - relbert/lexical_relation_classification
100
+ - tasksource/linguisticprobing
101
  - tasksource/crowdflower
102
  - metaeval/ethics
103
  - emo
104
  - google_wellformed_query
105
  - tweets_hate_speech_detection
106
  - has_part
 
 
 
 
 
 
107
  - blog_authorship_corpus
108
  - launch/open_question_type
109
  - health_fact
 
122
  - sem_eval_2010_task_8
123
  - demo-org/auditor_review
124
  - medmcqa
 
125
  - RuyuanWan/Dynasent_Disagreement
126
  - RuyuanWan/Politeness_Disagreement
127
  - RuyuanWan/SBIC_Disagreement
 
129
  - RuyuanWan/Dilemmas_Disagreement
130
  - lucasmccabe/logiqa
131
  - wiki_qa
132
+ - tasksource/cycic_classification
133
+ - tasksource/cycic_multiplechoice
134
+ - tasksource/sts-companion
135
+ - tasksource/commonsense_qa_2.0
136
+ - tasksource/lingnli
137
+ - tasksource/monotonicity-entailment
138
+ - tasksource/arct
139
+ - tasksource/scinli
140
+ - tasksource/naturallogic
141
  - onestop_qa
142
  - demelin/moral_stories
143
  - corypaik/prost
144
  - aps/dynahate
145
  - metaeval/syntactic-augmentation-nli
146
+ - tasksource/autotnli
147
  - lasha-nlp/CONDAQA
148
  - openai/webgpt_comparisons
149
  - Dahoas/synthetic-instruct-gptj-pairwise
150
  - metaeval/scruples
151
  - metaeval/wouldyourather
 
152
  - metaeval/defeasible-nli
153
+ - tasksource/help-nli
154
  - metaeval/nli-veridicality-transitivity
155
+ - tasksource/lonli
 
156
  - tasksource/dadc-limit-nli
157
  - ColumbiaNLP/FLUTE
158
+ - tasksource/strategy-qa
159
  - openai/summarize_from_feedback
160
  - tasksource/folio
161
+ - yale-nlp/FOLIO
162
+ - tasksource/tomi-nli
163
+ - tasksource/avicenna
164
  - stanfordnlp/SHP
165
  - GBaker/MedQA-USMLE-4-options-hf
 
166
  - sileod/wikimedqa
167
  - declare-lab/cicero
168
  - amydeng2000/CREAK
169
+ - tasksource/mutual
170
  - inverse-scaling/NeQA
171
  - inverse-scaling/quote-repetition
172
  - inverse-scaling/redefine-math
173
  - tasksource/puzzte
174
+ - tasksource/implicatures
175
  - race
176
+ - tasksource/race-c
177
+ - tasksource/spartqa-yn
178
+ - tasksource/spartqa-mchoice
179
+ - tasksource/temporal-nli
 
 
 
 
 
180
  - riddle_sense
181
+ - tasksource/clcd-english
182
+ - maximedb/twentyquestions
183
+ - metaeval/reclor
184
+ - tasksource/counterfactually-augmented-imdb
185
+ - tasksource/counterfactually-augmented-snli
186
+ - metaeval/cnli
187
+ - tasksource/boolq-natural-perturbations
188
+ - metaeval/acceptability-prediction
189
+ - metaeval/equate
190
+ - tasksource/ScienceQA_text_only
191
  - Jiangjie/ekar_english
192
+ - tasksource/implicit-hate-stg1
193
  - metaeval/chaos-mnli-ambiguity
194
  - IlyaGusev/headline_cause
195
+ - tasksource/logiqa-2.0-nli
196
+ - tasksource/oasst2_dense_flat
197
+ - sileod/mindgames
198
  - metaeval/ambient
199
+ - metaeval/path-naturalness-prediction
 
200
  - civil_comments
201
+ - AndyChiang/cloth
202
+ - AndyChiang/dgen
 
203
  - tasksource/I2D2
 
 
 
 
 
 
204
  - webis/args_me
205
  - webis/Touche23-ValueEval
206
  - tasksource/starcon
207
+ - PolyAI/banking77
 
208
  - tasksource/ConTRoL-nli
209
  - tasksource/tracie
210
  - tasksource/sherliic
211
  - tasksource/sen-making
212
  - tasksource/winowhy
 
213
  - tasksource/robustLR
214
  - CLUTRR/v1
215
  - tasksource/logical-fallacy
 
221
  - tasksource/TroFi
222
  - sharc_modified
223
  - tasksource/conceptrules_v2
224
+ - metaeval/disrpt
225
+ - tasksource/zero-shot-label-nli
 
226
  - tasksource/com2sense
227
  - tasksource/scone
228
  - tasksource/winodict
229
  - tasksource/fool-me-twice
230
  - tasksource/monli
231
  - tasksource/corr2cause
232
+ - lighteval/lsat_qa
233
  - tasksource/apt
234
  - zeroshot/twitter-financial-news-sentiment
235
  - tasksource/icl-symbol-tuning-instruct
 
237
  - sihaochen/propsegment
238
  - HannahRoseKirk/HatemojiBuild
239
  - tasksource/regset
240
+ - tasksource/esci
241
  - lmsys/chatbot_arena_conversations
242
+ - neurae/dnd_style_intents
243
+ - hitachi-nlp/FLD.v2
244
+ - tasksource/SDOH-NLI
245
+ - allenai/scifact_entailment
246
+ - tasksource/feasibilityQA
247
+ - tasksource/simple_pair
248
+ - tasksource/AdjectiveScaleProbe-nli
249
+ - tasksource/resnli
250
+ - tasksource/SpaRTUN
251
+ - tasksource/ReSQ
252
+ - tasksource/semantic_fragments_nli
253
+ - MoritzLaurer/dataset_train_nli
254
+ - tasksource/stepgame
255
+ - tasksource/nlgraph
256
+ - tasksource/oasst2_pairwise_rlhf_reward
257
+ - tasksource/hh-rlhf
258
+ - tasksource/ruletaker
259
+ - qbao775/PARARULE-Plus
260
+ - tasksource/proofwriter
261
+ - tasksource/logical-entailment
262
+ - tasksource/nope
263
+ - tasksource/LogicNLI
264
+ - kiddothe2b/contract-nli
265
+ - AshtonIsNotHere/nli4ct_semeval2024
266
+ - tasksource/lsat-ar
267
+ - tasksource/lsat-rc
268
+ - AshtonIsNotHere/biosift-nli
269
+ - tasksource/brainteasers
270
+ - Anthropic/persuasion
271
+ - erbacher/AmbigNQ-clarifying-question
272
+ - tasksource/SIGA-nli
273
+ - unigram/FOL-nli
274
+ - tasksource/goal-step-wikihow
275
+ - GGLab/PARADISE
276
+ - tasksource/doc-nli
277
+ - tasksource/mctest-nli
278
+ - tasksource/patent-phrase-similarity
279
+ - tasksource/natural-language-satisfiability
280
+ - tasksource/idioms-nli
281
+ - tasksource/lifecycle-entailment
282
+ - nvidia/HelpSteer
283
+ - nvidia/HelpSteer2
284
+ - sadat2307/MSciNLI
285
+ - pushpdeep/UltraFeedback-paired
286
+ - tasksource/AES2-essay-scoring
287
+ - tasksource/english-grading
288
+ - tasksource/wice
289
+ - Dzeniks/hover
290
+ - sileod/missing-item-prediction
291
+ - tasksource/tasksource_dpo_pairs
292
+
293
  language: en
294
  library_name: transformers
295
  license: apache-2.0