booydar commited on
Commit
3aac311
·
verified ·
1 Parent(s): 4bec130

Fill in missing result scores

Browse files
Files changed (1) hide show
  1. data/leaderboard-v0_results.csv +7 -7
data/leaderboard-v0_results.csv CHANGED
@@ -25,10 +25,10 @@ Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,,
25
  Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
26
  GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
27
  Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
28
- ~ Mamba (130M) fine-tune,avg,,,,98.7,98.5,98.5,98.1,97,92.5,,,
29
  Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
30
  ~ RMT (137M) fine-tune,avg,99.36,97.4,94.66,92.32,89.9,85.62,77.88,69.86,58.52,46.36,42.84,33.78
31
- ~ ARMT (137M) fine-tune,avg,99.32,,,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6
32
  GPT-2 (137M),qa1,35,13,,,,,,,,,,
33
  mamba-2.8b-hf,qa1,65,56,40,7,1,,,,,,,
34
  rwkv-6-world-7b,qa1,65,62,53,39,5,,,,,,,
@@ -56,7 +56,7 @@ GPT-4,qa1,99,100,100,95,93,84,62,58,39,,,
56
  ~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
57
  Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
58
  ~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
59
- ~ ARMT (137M) fine-tune,qa1,100,,,100,100,100,100,100,100,99,99,89
60
  GPT-2 (137M),qa2,21,17,,,,,,,,,,
61
  mamba-2.8b-hf,qa2,68,47,24,8,0,,,,,,,
62
  rwkv-6-world-7b,qa2,42,26,20,16,3,,,,,,,
@@ -84,7 +84,7 @@ GPT-4,qa2,88,79,72,68,65,59,42,25,25,,,
84
  ~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
85
  Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
86
  ~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
87
- ~ ARMT (137M) fine-tune,qa2,100,,,100,100,100,100,100,100,99,99,84
88
  GPT-2 (137M),qa3,6,8,,,,,,,,,,
89
  mamba-2.8b-hf,qa3,48,39,21,8,0,,,,,,,
90
  rwkv-6-world-7b,qa3,40,45,28,24,4,,,,,,,
@@ -112,7 +112,7 @@ GPT-4,qa3,56,63,57,56,53,45,31,31,32,,,
112
  ~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
113
  Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
114
  ~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
115
- ~ ARMT (137M) fine-tune,qa3,97,,,92,92,92,91,90,86,80,72,37
116
  GPT-2 (137M),qa4,29,18,,,,,,,,,,
117
  mamba-2.8b-hf,qa4,96,59,47,12,0,,,,,,,
118
  rwkv-6-world-7b,qa4,54,65,57,35,7,,,,,,,
@@ -140,7 +140,7 @@ GPT-4,qa4,98,70,63,60,52,47,46,40,32,,,
140
  ~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
141
  Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
142
  ~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
143
- ~ ARMT (137M) fine-tune,qa4,100,,,100,100,100,100,100,100,100,100,92
144
  GPT-2 (137M),qa5,45,19,,,,,,,,,,
145
  mamba-2.8b-hf,qa5,75,58,43,9,0,,,,,,,
146
  rwkv-6-world-7b,qa5,79,77,80,61,14,,,,,,,
@@ -168,4 +168,4 @@ GPT-4,qa5,96,95,92,90,93,85,82,60,51,,,
168
  ~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
169
  Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
170
  ~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
171
- ~ ARMT (137M) fine-tune,qa5,99.6,,,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6
 
25
  Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
26
  GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
27
  Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
28
+ ~ Mamba (130M) fine-tune,avg,98.46,98.6,98.7,98.7,98.5,98.5,98.1,97,92.5,,,
29
  Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
30
  ~ RMT (137M) fine-tune,avg,99.36,97.4,94.66,92.32,89.9,85.62,77.88,69.86,58.52,46.36,42.84,33.78
31
+ ~ ARMT (137M) fine-tune,avg,99.32,95.9,97.6,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6
32
  GPT-2 (137M),qa1,35,13,,,,,,,,,,
33
  mamba-2.8b-hf,qa1,65,56,40,7,1,,,,,,,
34
  rwkv-6-world-7b,qa1,65,62,53,39,5,,,,,,,
 
56
  ~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
57
  Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
58
  ~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
59
+ ~ ARMT (137M) fine-tune,qa1,100,99.3,99.6,100,100,100,100,100,100,99,99,89
60
  GPT-2 (137M),qa2,21,17,,,,,,,,,,
61
  mamba-2.8b-hf,qa2,68,47,24,8,0,,,,,,,
62
  rwkv-6-world-7b,qa2,42,26,20,16,3,,,,,,,
 
84
  ~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
85
  Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
86
  ~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
87
+ ~ ARMT (137M) fine-tune,qa2,100,96.9,98.8,100,100,100,100,100,100,99,99,84
88
  GPT-2 (137M),qa3,6,8,,,,,,,,,,
89
  mamba-2.8b-hf,qa3,48,39,21,8,0,,,,,,,
90
  rwkv-6-world-7b,qa3,40,45,28,24,4,,,,,,,
 
112
  ~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
113
  Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
114
  ~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
115
+ ~ ARMT (137M) fine-tune,qa3,97,85.2,90.5,92,92,92,91,90,86,80,72,37
116
  GPT-2 (137M),qa4,29,18,,,,,,,,,,
117
  mamba-2.8b-hf,qa4,96,59,47,12,0,,,,,,,
118
  rwkv-6-world-7b,qa4,54,65,57,35,7,,,,,,,
 
140
  ~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
141
  Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
142
  ~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
143
+ ~ ARMT (137M) fine-tune,qa4,100,99.4,99.8,100,100,100,100,100,100,100,100,92
144
  GPT-2 (137M),qa5,45,19,,,,,,,,,,
145
  mamba-2.8b-hf,qa5,75,58,43,9,0,,,,,,,
146
  rwkv-6-world-7b,qa5,79,77,80,61,14,,,,,,,
 
168
  ~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
169
  Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
170
  ~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
171
+ ~ ARMT (137M) fine-tune,qa5,99.6,98.5,99.1,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6