lucyknada Delta-Vector commited on
Commit
e412f30
1 Parent(s): 8033195

Update README.md (#3)

Browse files

- Update README.md (332267404693c7d034502da0e6280c0d154d2de9)


Co-authored-by: DV <[email protected]>

Files changed (1) hide show
  1. README.md +20 -8
README.md CHANGED
@@ -23,7 +23,8 @@ model-index:
23
  value: 56.29
24
  name: strict accuracy
25
  source:
26
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
27
  name: Open LLM Leaderboard
28
  - task:
29
  type: text-generation
@@ -38,7 +39,8 @@ model-index:
38
  value: 35.55
39
  name: normalized accuracy
40
  source:
41
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
42
  name: Open LLM Leaderboard
43
  - task:
44
  type: text-generation
@@ -53,7 +55,8 @@ model-index:
53
  value: 17.6
54
  name: exact match
55
  source:
56
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
57
  name: Open LLM Leaderboard
58
  - task:
59
  type: text-generation
@@ -68,7 +71,8 @@ model-index:
68
  value: 10.4
69
  name: acc_norm
70
  source:
71
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
72
  name: Open LLM Leaderboard
73
  - task:
74
  type: text-generation
@@ -83,7 +87,8 @@ model-index:
83
  value: 13.43
84
  name: acc_norm
85
  source:
86
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
87
  name: Open LLM Leaderboard
88
  - task:
89
  type: text-generation
@@ -100,8 +105,16 @@ model-index:
100
  value: 31.44
101
  name: accuracy
102
  source:
103
- url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
 
104
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
105
  ---
106
 
107
 
@@ -271,5 +284,4 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
271
  |MATH Lvl 5 (4-Shot)|17.60|
272
  |GPQA (0-shot) |10.40|
273
  |MuSR (0-shot) |13.43|
274
- |MMLU-PRO (5-shot) |31.44|
275
-
 
23
  value: 56.29
24
  name: strict accuracy
25
  source:
26
+ url: >-
27
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
28
  name: Open LLM Leaderboard
29
  - task:
30
  type: text-generation
 
39
  value: 35.55
40
  name: normalized accuracy
41
  source:
42
+ url: >-
43
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
44
  name: Open LLM Leaderboard
45
  - task:
46
  type: text-generation
 
55
  value: 17.6
56
  name: exact match
57
  source:
58
+ url: >-
59
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
60
  name: Open LLM Leaderboard
61
  - task:
62
  type: text-generation
 
71
  value: 10.4
72
  name: acc_norm
73
  source:
74
+ url: >-
75
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
76
  name: Open LLM Leaderboard
77
  - task:
78
  type: text-generation
 
87
  value: 13.43
88
  name: acc_norm
89
  source:
90
+ url: >-
91
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
92
  name: Open LLM Leaderboard
93
  - task:
94
  type: text-generation
 
105
  value: 31.44
106
  name: accuracy
107
  source:
108
+ url: >-
109
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=anthracite-org/magnum-v4-22b
110
  name: Open LLM Leaderboard
111
+ datasets:
112
+ - anthracite-org/c2_logs_32k_mistral-v3_v1.2_no_system
113
+ - anthracite-org/kalo-opus-instruct-22k-no-refusal-no-system
114
+ - anthracite-org/kalo-opus-instruct-3k-filtered-no-system
115
+ - anthracite-org/nopm_claude_writing_fixed
116
+ - anthracite-org/kalo_opus_misc_240827_no_system
117
+ - anthracite-org/kalo_misc_part2_no_system
118
  ---
119
 
120
 
 
284
  |MATH Lvl 5 (4-Shot)|17.60|
285
  |GPQA (0-shot) |10.40|
286
  |MuSR (0-shot) |13.43|
287
+ |MMLU-PRO (5-shot) |31.44|