CreitinGameplays commited on
Commit
5413b3e
1 Parent(s): 7d549b5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -92
README.md CHANGED
@@ -91,95 +91,3 @@ Using this format when interacting with the model can improve its performance an
91
 
92
  ------
93
 
94
- @misc{open-llm-leaderboard,
95
- author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
96
- title = {Open LLM Leaderboard},
97
- year = {2023},
98
- publisher = {Hugging Face},
99
- howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
100
- }
101
- @software{eval-harness,
102
- author = {Gao, Leo and
103
- Tow, Jonathan and
104
- Biderman, Stella and
105
- Black, Sid and
106
- DiPofi, Anthony and
107
- Foster, Charles and
108
- Golding, Laurence and
109
- Hsu, Jeffrey and
110
- McDonell, Kyle and
111
- Muennighoff, Niklas and
112
- Phang, Jason and
113
- Reynolds, Laria and
114
- Tang, Eric and
115
- Thite, Anish and
116
- Wang, Ben and
117
- Wang, Kevin and
118
- Zou, Andy},
119
- title = {A framework for few-shot language model evaluation},
120
- month = sep,
121
- year = 2021,
122
- publisher = {Zenodo},
123
- version = {v0.0.1},
124
- doi = {10.5281/zenodo.5371628},
125
- url = {https://doi.org/10.5281/zenodo.5371628}
126
- }
127
- @misc{clark2018think,
128
- title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
129
- author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
130
- year={2018},
131
- eprint={1803.05457},
132
- archivePrefix={arXiv},
133
- primaryClass={cs.AI}
134
- }
135
- @misc{zellers2019hellaswag,
136
- title={HellaSwag: Can a Machine Really Finish Your Sentence?},
137
- author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
138
- year={2019},
139
- eprint={1905.07830},
140
- archivePrefix={arXiv},
141
- primaryClass={cs.CL}
142
- }
143
- @misc{hendrycks2021measuring,
144
- title={Measuring Massive Multitask Language Understanding},
145
- author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
146
- year={2021},
147
- eprint={2009.03300},
148
- archivePrefix={arXiv},
149
- primaryClass={cs.CY}
150
- }
151
- @misc{lin2022truthfulqa,
152
- title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
153
- author={Stephanie Lin and Jacob Hilton and Owain Evans},
154
- year={2022},
155
- eprint={2109.07958},
156
- archivePrefix={arXiv},
157
- primaryClass={cs.CL}
158
- }
159
- @misc{DBLP:journals/corr/abs-1907-10641,
160
- title={{WINOGRANDE:} An Adversarial Winograd Schema Challenge at Scale},
161
- author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
162
- year={2019},
163
- eprint={1907.10641},
164
- archivePrefix={arXiv},
165
- primaryClass={cs.CL}
166
- }
167
- @misc{DBLP:journals/corr/abs-2110-14168,
168
- title={Training Verifiers to Solve Math Word Problems},
169
- author={Karl Cobbe and
170
- Vineet Kosaraju and
171
- Mohammad Bavarian and
172
- Mark Chen and
173
- Heewoo Jun and
174
- Lukasz Kaiser and
175
- Matthias Plappert and
176
- Jerry Tworek and
177
- Jacob Hilton and
178
- Reiichiro Nakano and
179
- Christopher Hesse and
180
- John Schulman},
181
- year={2021},
182
- eprint={2110.14168},
183
- archivePrefix={arXiv},
184
- primaryClass={cs.CL}
185
- }
 
91
 
92
  ------
93