Fill in missing result scores
Browse files
data/leaderboard-v0_results.csv
CHANGED
@@ -25,10 +25,10 @@ Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,,
|
|
25 |
Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
|
26 |
GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
|
27 |
Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
|
28 |
-
~ Mamba (130M) fine-tune,avg
|
29 |
Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
|
30 |
~ RMT (137M) fine-tune,avg,99.36,97.4,94.66,92.32,89.9,85.62,77.88,69.86,58.52,46.36,42.84,33.78
|
31 |
-
~ ARMT (137M) fine-tune,avg,99.32
|
32 |
GPT-2 (137M),qa1,35,13,,,,,,,,,,
|
33 |
mamba-2.8b-hf,qa1,65,56,40,7,1,,,,,,,
|
34 |
rwkv-6-world-7b,qa1,65,62,53,39,5,,,,,,,
|
@@ -56,7 +56,7 @@ GPT-4,qa1,99,100,100,95,93,84,62,58,39,,,
|
|
56 |
~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
|
57 |
Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
|
58 |
~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
|
59 |
-
~ ARMT (137M) fine-tune,qa1,100
|
60 |
GPT-2 (137M),qa2,21,17,,,,,,,,,,
|
61 |
mamba-2.8b-hf,qa2,68,47,24,8,0,,,,,,,
|
62 |
rwkv-6-world-7b,qa2,42,26,20,16,3,,,,,,,
|
@@ -84,7 +84,7 @@ GPT-4,qa2,88,79,72,68,65,59,42,25,25,,,
|
|
84 |
~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
|
85 |
Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
|
86 |
~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
|
87 |
-
~ ARMT (137M) fine-tune,qa2,100
|
88 |
GPT-2 (137M),qa3,6,8,,,,,,,,,,
|
89 |
mamba-2.8b-hf,qa3,48,39,21,8,0,,,,,,,
|
90 |
rwkv-6-world-7b,qa3,40,45,28,24,4,,,,,,,
|
@@ -112,7 +112,7 @@ GPT-4,qa3,56,63,57,56,53,45,31,31,32,,,
|
|
112 |
~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
|
113 |
Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
|
114 |
~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
|
115 |
-
~ ARMT (137M) fine-tune,qa3,97
|
116 |
GPT-2 (137M),qa4,29,18,,,,,,,,,,
|
117 |
mamba-2.8b-hf,qa4,96,59,47,12,0,,,,,,,
|
118 |
rwkv-6-world-7b,qa4,54,65,57,35,7,,,,,,,
|
@@ -140,7 +140,7 @@ GPT-4,qa4,98,70,63,60,52,47,46,40,32,,,
|
|
140 |
~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
|
141 |
Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
|
142 |
~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
|
143 |
-
~ ARMT (137M) fine-tune,qa4,100
|
144 |
GPT-2 (137M),qa5,45,19,,,,,,,,,,
|
145 |
mamba-2.8b-hf,qa5,75,58,43,9,0,,,,,,,
|
146 |
rwkv-6-world-7b,qa5,79,77,80,61,14,,,,,,,
|
@@ -168,4 +168,4 @@ GPT-4,qa5,96,95,92,90,93,85,82,60,51,,,
|
|
168 |
~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
|
169 |
Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
|
170 |
~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
|
171 |
-
~ ARMT (137M) fine-tune,qa5,99.6
|
|
|
25 |
Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
|
26 |
GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
|
27 |
Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
|
28 |
+
~ Mamba (130M) fine-tune,avg,98.46,98.6,98.7,98.7,98.5,98.5,98.1,97,92.5,,,
|
29 |
Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
|
30 |
~ RMT (137M) fine-tune,avg,99.36,97.4,94.66,92.32,89.9,85.62,77.88,69.86,58.52,46.36,42.84,33.78
|
31 |
+
~ ARMT (137M) fine-tune,avg,99.32,95.9,97.6,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6
|
32 |
GPT-2 (137M),qa1,35,13,,,,,,,,,,
|
33 |
mamba-2.8b-hf,qa1,65,56,40,7,1,,,,,,,
|
34 |
rwkv-6-world-7b,qa1,65,62,53,39,5,,,,,,,
|
|
|
56 |
~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
|
57 |
Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
|
58 |
~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
|
59 |
+
~ ARMT (137M) fine-tune,qa1,100,99.3,99.6,100,100,100,100,100,100,99,99,89
|
60 |
GPT-2 (137M),qa2,21,17,,,,,,,,,,
|
61 |
mamba-2.8b-hf,qa2,68,47,24,8,0,,,,,,,
|
62 |
rwkv-6-world-7b,qa2,42,26,20,16,3,,,,,,,
|
|
|
84 |
~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
|
85 |
Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
|
86 |
~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
|
87 |
+
~ ARMT (137M) fine-tune,qa2,100,96.9,98.8,100,100,100,100,100,100,99,99,84
|
88 |
GPT-2 (137M),qa3,6,8,,,,,,,,,,
|
89 |
mamba-2.8b-hf,qa3,48,39,21,8,0,,,,,,,
|
90 |
rwkv-6-world-7b,qa3,40,45,28,24,4,,,,,,,
|
|
|
112 |
~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
|
113 |
Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
|
114 |
~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
|
115 |
+
~ ARMT (137M) fine-tune,qa3,97,85.2,90.5,92,92,92,91,90,86,80,72,37
|
116 |
GPT-2 (137M),qa4,29,18,,,,,,,,,,
|
117 |
mamba-2.8b-hf,qa4,96,59,47,12,0,,,,,,,
|
118 |
rwkv-6-world-7b,qa4,54,65,57,35,7,,,,,,,
|
|
|
140 |
~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
|
141 |
Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
|
142 |
~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
|
143 |
+
~ ARMT (137M) fine-tune,qa4,100,99.4,99.8,100,100,100,100,100,100,100,100,92
|
144 |
GPT-2 (137M),qa5,45,19,,,,,,,,,,
|
145 |
mamba-2.8b-hf,qa5,75,58,43,9,0,,,,,,,
|
146 |
rwkv-6-world-7b,qa5,79,77,80,61,14,,,,,,,
|
|
|
168 |
~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
|
169 |
Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
|
170 |
~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
|
171 |
+
~ ARMT (137M) fine-tune,qa5,99.6,98.5,99.1,98.1,98.2,98.1,98,97.9,96.9,95.3,93.4,76.6
|