booydar commited on
Commit
275a638
·
verified ·
1 Parent(s): 3aac311

Clarify naming

Browse files
Files changed (1) hide show
  1. data/leaderboard-v0_results.csv +6 -6
data/leaderboard-v0_results.csv CHANGED
@@ -23,7 +23,7 @@ ai21labs/Jamba-v0.1,avg,65,53,50,48,46,45,41,40,34,,,
23
  c4ai-command-r-v01,avg,64,64,63,61,59,52,51,46,38,,,
24
  Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,,
25
  Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
26
- GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
27
  Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
28
  ~ Mamba (130M) fine-tune,avg,98.46,98.6,98.7,98.7,98.5,98.5,98.1,97,92.5,,,
29
  Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
@@ -52,7 +52,7 @@ Phi-3-mini-128k-instruct,qa1,97,84,72,69,70,60,53,38,1,,,
52
  ai21labs/Jamba-v0.1,qa1,90,72,66,63,65,58,50,49,38,,,
53
  c4ai-command-r-v01,qa1,98,95,94,91,89,68,70,50,30,,,
54
  Phi-3-medium-128k-instruct,qa1,100,93,87,80,81,72,69,58,21,,,
55
- GPT-4,qa1,99,100,100,95,93,84,62,58,39,,,
56
  ~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
57
  Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
58
  ~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
@@ -80,7 +80,7 @@ Phi-3-mini-128k-instruct,qa2,57,38,38,36,34,23,22,15,2,,,
80
  ai21labs/Jamba-v0.1,qa2,57,43,42,39,37,29,26,20,16,,,
81
  c4ai-command-r-v01,qa2,64,58,56,54,50,39,37,32,16,,,
82
  Phi-3-medium-128k-instruct,qa2,76,62,58,51,44,41,27,14,11,,,
83
- GPT-4,qa2,88,79,72,68,65,59,42,25,25,,,
84
  ~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
85
  Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
86
  ~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
@@ -108,7 +108,7 @@ Phi-3-mini-128k-instruct,qa3,32,41,31,27,26,24,21,22,4,,,
108
  ai21labs/Jamba-v0.1,qa3,32,31,29,26,24,22,22,21,26,,,
109
  c4ai-command-r-v01,qa3,25,28,26,28,26,30,28,33,24,,,
110
  Phi-3-medium-128k-instruct,qa3,53,51,45,35,30,30,27,25,17,,,
111
- GPT-4,qa3,56,63,57,56,53,45,31,31,32,,,
112
  ~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
113
  Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
114
  ~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
@@ -136,7 +136,7 @@ Phi-3-mini-128k-instruct,qa4,54,56,56,50,49,50,45,47,5,,,
136
  ai21labs/Jamba-v0.1,qa4,64,50,49,49,48,52,46,49,38,,,
137
  c4ai-command-r-v01,qa4,46,58,59,54,56,46,46,47,52,,,
138
  Phi-3-medium-128k-instruct,qa4,54,61,63,64,64,61,59,52,33,,,
139
- GPT-4,qa4,98,70,63,60,52,47,46,40,32,,,
140
  ~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
141
  Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
142
  ~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
@@ -164,7 +164,7 @@ Phi-3-mini-128k-instruct,qa5,79,66,76,72,72,73,71,64,23,,,
164
  ai21labs/Jamba-v0.1,qa5,83,70,64,62,58,64,63,60,50,,,
165
  c4ai-command-r-v01,qa5,86,82,81,78,75,79,72,70,66,,,
166
  Phi-3-medium-128k-instruct,qa5,77,85,84,81,82,82,81,78,69,,,
167
- GPT-4,qa5,96,95,92,90,93,85,82,60,51,,,
168
  ~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
169
  Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
170
  ~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
 
23
  c4ai-command-r-v01,avg,64,64,63,61,59,52,51,46,38,,,
24
  Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,,
25
  Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
26
+ gpt-4-0125-preview,avg,87,81,77,74,71,64,53,43,36,,,
27
  Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
28
  ~ Mamba (130M) fine-tune,avg,98.46,98.6,98.7,98.7,98.5,98.5,98.1,97,92.5,,,
29
  Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
 
52
  ai21labs/Jamba-v0.1,qa1,90,72,66,63,65,58,50,49,38,,,
53
  c4ai-command-r-v01,qa1,98,95,94,91,89,68,70,50,30,,,
54
  Phi-3-medium-128k-instruct,qa1,100,93,87,80,81,72,69,58,21,,,
55
+ gpt-4-0125-preview,qa1,99,100,100,95,93,84,62,58,39,,,
56
  ~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
57
  Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
58
  ~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
 
80
  ai21labs/Jamba-v0.1,qa2,57,43,42,39,37,29,26,20,16,,,
81
  c4ai-command-r-v01,qa2,64,58,56,54,50,39,37,32,16,,,
82
  Phi-3-medium-128k-instruct,qa2,76,62,58,51,44,41,27,14,11,,,
83
+ gpt-4-0125-preview,qa2,88,79,72,68,65,59,42,25,25,,,
84
  ~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
85
  Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
86
  ~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
 
108
  ai21labs/Jamba-v0.1,qa3,32,31,29,26,24,22,22,21,26,,,
109
  c4ai-command-r-v01,qa3,25,28,26,28,26,30,28,33,24,,,
110
  Phi-3-medium-128k-instruct,qa3,53,51,45,35,30,30,27,25,17,,,
111
+ gpt-4-0125-preview,qa3,56,63,57,56,53,45,31,31,32,,,
112
  ~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
113
  Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
114
  ~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
 
136
  ai21labs/Jamba-v0.1,qa4,64,50,49,49,48,52,46,49,38,,,
137
  c4ai-command-r-v01,qa4,46,58,59,54,56,46,46,47,52,,,
138
  Phi-3-medium-128k-instruct,qa4,54,61,63,64,64,61,59,52,33,,,
139
+ gpt-4-0125-preview,qa4,98,70,63,60,52,47,46,40,32,,,
140
  ~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
141
  Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
142
  ~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
 
164
  ai21labs/Jamba-v0.1,qa5,83,70,64,62,58,64,63,60,50,,,
165
  c4ai-command-r-v01,qa5,86,82,81,78,75,79,72,70,66,,,
166
  Phi-3-medium-128k-instruct,qa5,77,85,84,81,82,82,81,78,69,,,
167
+ gpt-4-0125-preview,qa5,96,95,92,90,93,85,82,60,51,,,
168
  ~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
169
  Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
170
  ~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44