sequelbox commited on
Commit
4d54ca8
1 Parent(s): bef139c
Files changed (1) hide show
  1. README.md +92 -0
README.md CHANGED
@@ -179,6 +179,98 @@ model-index:
179
  - type: acc
180
  value: 90.00
181
  name: acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  ---
184
  # PlumChat 70b
 
179
  - type: acc
180
  value: 90.00
181
  name: acc
182
+ - task:
183
+ type: text-generation
184
+ name: Text Generation
185
+ dataset:
186
+ name: IFEval (0-Shot)
187
+ type: HuggingFaceH4/ifeval
188
+ args:
189
+ num_few_shot: 0
190
+ metrics:
191
+ - type: inst_level_strict_acc and prompt_level_strict_acc
192
+ value: 56.16
193
+ name: strict accuracy
194
+ source:
195
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
196
+ name: Open LLM Leaderboard
197
+ - task:
198
+ type: text-generation
199
+ name: Text Generation
200
+ dataset:
201
+ name: BBH (3-Shot)
202
+ type: BBH
203
+ args:
204
+ num_few_shot: 3
205
+ metrics:
206
+ - type: acc_norm
207
+ value: 52.81
208
+ name: normalized accuracy
209
+ source:
210
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
211
+ name: Open LLM Leaderboard
212
+ - task:
213
+ type: text-generation
214
+ name: Text Generation
215
+ dataset:
216
+ name: MATH Lvl 5 (4-Shot)
217
+ type: hendrycks/competition_math
218
+ args:
219
+ num_few_shot: 4
220
+ metrics:
221
+ - type: exact_match
222
+ value: 29.98
223
+ name: exact match
224
+ source:
225
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
226
+ name: Open LLM Leaderboard
227
+ - task:
228
+ type: text-generation
229
+ name: Text Generation
230
+ dataset:
231
+ name: GPQA (0-shot)
232
+ type: Idavidrein/gpqa
233
+ args:
234
+ num_few_shot: 0
235
+ metrics:
236
+ - type: acc_norm
237
+ value: 18.79
238
+ name: acc_norm
239
+ source:
240
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
241
+ name: Open LLM Leaderboard
242
+ - task:
243
+ type: text-generation
244
+ name: Text Generation
245
+ dataset:
246
+ name: MuSR (0-shot)
247
+ type: TAUR-Lab/MuSR
248
+ args:
249
+ num_few_shot: 0
250
+ metrics:
251
+ - type: acc_norm
252
+ value: 20.14
253
+ name: acc_norm
254
+ source:
255
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
256
+ name: Open LLM Leaderboard
257
+ - task:
258
+ type: text-generation
259
+ name: Text Generation
260
+ dataset:
261
+ name: MMLU-PRO (5-shot)
262
+ type: TIGER-Lab/MMLU-Pro
263
+ config: main
264
+ split: test
265
+ args:
266
+ num_few_shot: 5
267
+ metrics:
268
+ - type: acc
269
+ value: 46.26
270
+ name: accuracy
271
+ source:
272
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
273
+ name: Open LLM Leaderboard
274
 
275
  ---
276
  # PlumChat 70b