pszemraj leaderboard-pr-bot commited on
Commit
1aef4be
1 Parent(s): fb4bcd4

Adding Evaluation Results (#4)

Browse files

- Adding Evaluation Results (eae7a8b5ea03b52b7b0fecdd998615e17cca22da)


Co-authored-by: Open LLM Leaderboard PR Bot <[email protected]>

Files changed (1) hide show
  1. README.md +107 -1
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
 
3
  datasets:
4
  - teknium/openhermes
5
- base_model: BEE-spoke-data/smol_llama-220M-GQA
6
  inference:
7
  parameters:
8
  do_sample: true
@@ -124,6 +124,98 @@ model-index:
124
  source:
125
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
126
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ---
128
 
129
 
@@ -198,3 +290,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
198
  |Winogrande (5-shot) |52.01|
199
  |GSM8k (5-shot) | 0.61|
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ base_model: BEE-spoke-data/smol_llama-220M-GQA
4
  datasets:
5
  - teknium/openhermes
 
6
  inference:
7
  parameters:
8
  do_sample: true
 
124
  source:
125
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
126
  name: Open LLM Leaderboard
127
+ - task:
128
+ type: text-generation
129
+ name: Text Generation
130
+ dataset:
131
+ name: IFEval (0-Shot)
132
+ type: HuggingFaceH4/ifeval
133
+ args:
134
+ num_few_shot: 0
135
+ metrics:
136
+ - type: inst_level_strict_acc and prompt_level_strict_acc
137
+ value: 15.55
138
+ name: strict accuracy
139
+ source:
140
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
141
+ name: Open LLM Leaderboard
142
+ - task:
143
+ type: text-generation
144
+ name: Text Generation
145
+ dataset:
146
+ name: BBH (3-Shot)
147
+ type: BBH
148
+ args:
149
+ num_few_shot: 3
150
+ metrics:
151
+ - type: acc_norm
152
+ value: 3.11
153
+ name: normalized accuracy
154
+ source:
155
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
156
+ name: Open LLM Leaderboard
157
+ - task:
158
+ type: text-generation
159
+ name: Text Generation
160
+ dataset:
161
+ name: MATH Lvl 5 (4-Shot)
162
+ type: hendrycks/competition_math
163
+ args:
164
+ num_few_shot: 4
165
+ metrics:
166
+ - type: exact_match
167
+ value: 0.0
168
+ name: exact match
169
+ source:
170
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
171
+ name: Open LLM Leaderboard
172
+ - task:
173
+ type: text-generation
174
+ name: Text Generation
175
+ dataset:
176
+ name: GPQA (0-shot)
177
+ type: Idavidrein/gpqa
178
+ args:
179
+ num_few_shot: 0
180
+ metrics:
181
+ - type: acc_norm
182
+ value: 2.35
183
+ name: acc_norm
184
+ source:
185
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
186
+ name: Open LLM Leaderboard
187
+ - task:
188
+ type: text-generation
189
+ name: Text Generation
190
+ dataset:
191
+ name: MuSR (0-shot)
192
+ type: TAUR-Lab/MuSR
193
+ args:
194
+ num_few_shot: 0
195
+ metrics:
196
+ - type: acc_norm
197
+ value: 6.22
198
+ name: acc_norm
199
+ source:
200
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
201
+ name: Open LLM Leaderboard
202
+ - task:
203
+ type: text-generation
204
+ name: Text Generation
205
+ dataset:
206
+ name: MMLU-PRO (5-shot)
207
+ type: TIGER-Lab/MMLU-Pro
208
+ config: main
209
+ split: test
210
+ args:
211
+ num_few_shot: 5
212
+ metrics:
213
+ - type: acc
214
+ value: 1.34
215
+ name: accuracy
216
+ source:
217
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes
218
+ name: Open LLM Leaderboard
219
  ---
220
 
221
 
 
290
  |Winogrande (5-shot) |52.01|
291
  |GSM8k (5-shot) | 0.61|
292
 
293
+
294
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
295
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_BEE-spoke-data__smol_llama-220M-openhermes)
296
+
297
+ | Metric |Value|
298
+ |-------------------|----:|
299
+ |Avg. | 4.76|
300
+ |IFEval (0-Shot) |15.55|
301
+ |BBH (3-Shot) | 3.11|
302
+ |MATH Lvl 5 (4-Shot)| 0.00|
303
+ |GPQA (0-shot) | 2.35|
304
+ |MuSR (0-shot) | 6.22|
305
+ |MMLU-PRO (5-shot) | 1.34|
306
+