Spaces:
Running
Running
booydar
commited on
Commit
·
cffcfac
1
Parent(s):
a2572ae
add new results
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- data/BABILong NeurIPS24 Figs - leaderboard.csv +169 -0
- notebooks/test.ipynb +78 -0
- results/01-ai/Yi-34B-200k/qa1/0.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/1000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/16000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/2000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/32000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/4000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/64000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa1/8000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/0.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/1000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/16000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/2000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/32000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/4000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/64000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa2/8000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/0.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/1000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/16000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/2000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/32000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/4000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/64000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa3/8000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/0.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/1000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/16000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/2000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/32000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/4000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/64000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa4/8000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/0.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/1000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/16000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/2000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/32000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/4000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/64000.csv +1 -1
- results/01-ai/Yi-34B-200k/qa5/8000.csv +1 -1
- results/01-ai/Yi-34B/qa1/0.csv +1 -1
- results/01-ai/Yi-34B/qa1/1000.csv +1 -1
- results/01-ai/Yi-34B/qa1/16000.csv +1 -1
- results/01-ai/Yi-34B/qa1/2000.csv +1 -1
- results/01-ai/Yi-34B/qa1/32000.csv +1 -1
- results/01-ai/Yi-34B/qa1/4000.csv +1 -1
- results/01-ai/Yi-34B/qa1/8000.csv +1 -1
- results/01-ai/Yi-34B/qa2/0.csv +1 -1
data/BABILong NeurIPS24 Figs - leaderboard.csv
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,task,0K,1K,2K,4K,8K,16K,32K,64K,128K,512K,1M,10M
|
2 |
+
GPT-2 (137M),avg,27,15,,,,,,,,,,
|
3 |
+
mamba-2.8b-hf,avg,70,52,35,9,0,,,,,,,
|
4 |
+
rwkv-6-world-7b,avg,56,55,48,35,7,,,,,,,
|
5 |
+
v5-Eagle-7B-HF,avg,62,54,48,41,2,,,,,,,
|
6 |
+
Meta-Llama-3-8B-Instruct,avg,64,60,58,50,44,,,,,,,
|
7 |
+
LLaMA-2-7B-32K,avg,41,53,45,40,39,32,3,,,,,
|
8 |
+
longchat-7b-v1.5-32k,avg,46,42,40,41,42,39,5,,,,,
|
9 |
+
LongAlpaca-13B,avg,48,47,46,43,40,36,4,,,,,
|
10 |
+
Llama-2-7B-32K-Instruct,avg,49,52,49,43,40,35,5,,,,,
|
11 |
+
01-ai/Yi-34B,avg,72,52,43,37,38,31,4,,,,,
|
12 |
+
Mistral-7b-Instruct-v0.2,avg,60,56,52,49,45,42,37,,,,,
|
13 |
+
Mixtral-8x7B-Instruct-v0.1,avg,65,63,60,55,50,46,40,,,,,
|
14 |
+
01-ai/Yi-34B-200k,avg,65,59,56,54,52,50,48,48,,,,
|
15 |
+
Mixtral-8x22B-Instruct-v0.1,avg,75,73,70,65,58,51,43,35,,,,
|
16 |
+
activation-beacon-llama2-7b-chat,avg,55,52,47,43,36,23,16,8,6,,,
|
17 |
+
Yarn-Mistral-7b-128k,avg,51,52,43,40,38,30,16,10,9,,,
|
18 |
+
chatglm3-6b-128k,avg,56,55,51,48,46,41,36,21,13,,,
|
19 |
+
activation-beacon-mistral-7b,avg,59,56,51,48,43,37,36,27,14,,,
|
20 |
+
01-ai/Yi-9B-200k,avg,52,55,48,46,45,36,37,29,24,,,
|
21 |
+
Phi-3-mini-128k-instruct,avg,64,57,55,51,50,46,42,37,7,,,
|
22 |
+
ai21labs/Jamba-v0.1,avg,65,53,50,48,46,45,41,40,34,,,
|
23 |
+
c4ai-command-r-v01,avg,64,64,63,61,59,52,51,46,38,,,
|
24 |
+
Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
|
25 |
+
GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
|
26 |
+
~ Mamba (130M) fine-tune,avg,,,,"98,7","98,5","98,5","98,1",97,"92,5",,,
|
27 |
+
Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
|
28 |
+
~ RMT (137M) fine-tune,avg,"99,36","97,4","94,66","92,32","89,9","85,62","77,88","69,86","58,52","46,36","42,84","33,78"
|
29 |
+
~ ARMT (137M) fine-tune,avg,"99,32",,,"98,1","98,2","98,1",98,"97,9","96,9","95,3","93,4","76,6"
|
30 |
+
GPT-2 (137M),qa1,35,13,,,,,,,,,,
|
31 |
+
mamba-2.8b-hf,qa1,65,56,40,7,1,,,,,,,
|
32 |
+
rwkv-6-world-7b,qa1,65,62,53,39,5,,,,,,,
|
33 |
+
v5-Eagle-7B-HF,qa1,68,58,52,36,3,,,,,,,
|
34 |
+
Meta-Llama-3-8B-Instruct,qa1,98,93,90,79,62,,,,,,,
|
35 |
+
LLaMA-2-7B-32K,qa1,54,57,33,26,34,32,3,,,,,
|
36 |
+
longchat-7b-v1.5-32k,qa1,52,60,56,55,50,42,4,,,,,
|
37 |
+
LongAlpaca-13B,qa1,58,55,58,50,37,23,2,,,,,
|
38 |
+
Llama-2-7B-32K-Instruct,qa1,65,61,52,41,35,23,3,,,,,
|
39 |
+
01-ai/Yi-34B,qa1,99,59,51,34,46,31,4,,,,,
|
40 |
+
Mistral-7b-Instruct-v0.2,qa1,92,86,75,64,63,57,45,,,,,
|
41 |
+
Mixtral-8x7B-Instruct-v0.1,qa1,99,92,84,77,65,53,49,,,,,
|
42 |
+
01-ai/Yi-34B-200k,qa1,85,73,68,66,63,65,62,60,,,,
|
43 |
+
Mixtral-8x22B-Instruct-v0.1,qa1,100,99,95,89,79,63,40,38,,,,
|
44 |
+
activation-beacon-llama2-7b-chat,qa1,85,81,67,65,48,21,16,6,5,,,
|
45 |
+
Yarn-Mistral-7b-128k,qa1,61,71,58,45,51,34,21,8,8,,,
|
46 |
+
chatglm3-6b-128k,qa1,82,77,74,72,67,56,47,13,13,,,
|
47 |
+
activation-beacon-mistral-7b,qa1,92,86,73,59,47,42,42,27,9,,,
|
48 |
+
01-ai/Yi-9B-200k,qa1,33,82,69,59,56,47,44,32,29,,,
|
49 |
+
Phi-3-mini-128k-instruct,qa1,97,84,72,69,70,60,53,38,1,,,
|
50 |
+
ai21labs/Jamba-v0.1,qa1,90,72,66,63,65,58,50,49,38,,,
|
51 |
+
c4ai-command-r-v01,qa1,98,95,94,91,89,68,70,50,30,,,
|
52 |
+
Phi-3-medium-128k-instruct,qa1,100,93,87,80,81,72,69,58,21,,,
|
53 |
+
GPT-4,qa1,99,100,100,95,93,84,62,58,39,,,
|
54 |
+
~ Mamba (130M) fine-tune,qa1,100,100,100,100,100,100,100,100,100,92,,
|
55 |
+
Llama3-ChatQA-1.5-8B + RAG,qa1,60,62,60,58,58,60,60,56,64,54,55,50
|
56 |
+
~ RMT (137M) fine-tune,qa1,100,100,100,100,100,100,99,96,94,87,84,66
|
57 |
+
~ ARMT (137M) fine-tune,qa1,100,,,100,100,100,100,100,100,99,99,89
|
58 |
+
GPT-2 (137M),qa2,21,17,,,,,,,,,,
|
59 |
+
mamba-2.8b-hf,qa2,68,47,24,8,0,,,,,,,
|
60 |
+
rwkv-6-world-7b,qa2,42,26,20,16,3,,,,,,,
|
61 |
+
v5-Eagle-7B-HF,qa2,43,27,25,19,2,,,,,,,
|
62 |
+
Meta-Llama-3-8B-Instruct,qa2,47,46,49,39,20,,,,,,,
|
63 |
+
LLaMA-2-7B-32K,qa2,37,40,37,26,27,21,2,,,,,
|
64 |
+
longchat-7b-v1.5-32k,qa2,31,19,14,13,18,14,2,,,,,
|
65 |
+
LongAlpaca-13B,qa2,30,30,28,29,26,18,2,,,,,
|
66 |
+
Llama-2-7B-32K-Instruct,qa2,41,40,36,27,20,16,3,,,,,
|
67 |
+
01-ai/Yi-34B,qa2,67,43,32,30,23,15,4,,,,,
|
68 |
+
Mistral-7b-Instruct-v0.2,qa2,46,34,32,22,11,9,7,,,,,
|
69 |
+
Mixtral-8x7B-Instruct-v0.1,qa2,58,51,46,35,27,18,14,,,,,
|
70 |
+
01-ai/Yi-34B-200k,qa2,62,51,46,42,35,32,27,30,,,,
|
71 |
+
Mixtral-8x22B-Instruct-v0.1,qa2,77,65,61,56,48,41,33,11,,,,
|
72 |
+
activation-beacon-llama2-7b-chat,qa2,52,37,28,25,23,11,9,4,2,,,
|
73 |
+
Yarn-Mistral-7b-128k,qa2,47,48,37,30,34,21,12,5,3,,,
|
74 |
+
chatglm3-6b-128k,qa2,51,46,45,39,38,31,24,6,5,,,
|
75 |
+
activation-beacon-mistral-7b,qa2,45,35,32,28,22,14,12,10,2,,,
|
76 |
+
01-ai/Yi-9B-200k,qa2,67,52,43,39,31,25,22,12,8,,,
|
77 |
+
Phi-3-mini-128k-instruct,qa2,57,38,38,36,34,23,22,15,2,,,
|
78 |
+
ai21labs/Jamba-v0.1,qa2,57,43,42,39,37,29,26,20,16,,,
|
79 |
+
c4ai-command-r-v01,qa2,64,58,56,54,50,39,37,32,16,,,
|
80 |
+
Phi-3-medium-128k-instruct,qa2,76,62,58,51,44,41,27,14,11,,,
|
81 |
+
GPT-4,qa2,88,79,72,68,65,59,42,25,25,,,
|
82 |
+
~ Mamba (130M) fine-tune,qa2,98,98,98,98,98,98,98,95,87,,,
|
83 |
+
Llama3-ChatQA-1.5-8B + RAG,qa2,28,25,22,19,14,13,9,7,6,6,2,2
|
84 |
+
~ RMT (137M) fine-tune,qa2,100,100,99,98,97,94,82,59,39,25,22,19
|
85 |
+
~ ARMT (137M) fine-tune,qa2,100,,,100,100,100,100,100,100,99,99,84
|
86 |
+
GPT-2 (137M),qa3,6,8,,,,,,,,,,
|
87 |
+
mamba-2.8b-hf,qa3,48,39,21,8,0,,,,,,,
|
88 |
+
rwkv-6-world-7b,qa3,40,45,28,24,4,,,,,,,
|
89 |
+
v5-Eagle-7B-HF,qa3,43,34,30,40,1,,,,,,,
|
90 |
+
Meta-Llama-3-8B-Instruct,qa3,33,28,30,26,11,,,,,,,
|
91 |
+
LLaMA-2-7B-32K,qa3,32,38,34,28,27,21,1,,,,,
|
92 |
+
longchat-7b-v1.5-32k,qa3,22,16,15,17,21,22,4,,,,,
|
93 |
+
LongAlpaca-13B,qa3,25,26,26,25,24,27,2,,,,,
|
94 |
+
Llama-2-7B-32K-Instruct,qa3,35,36,34,26,23,20,2,,,,,
|
95 |
+
01-ai/Yi-34B,qa3,45,34,24,20,17,12,4,,,,,
|
96 |
+
Mistral-7b-Instruct-v0.2,qa3,36,34,31,30,24,18,12,,,,,
|
97 |
+
Mixtral-8x7B-Instruct-v0.1,qa3,34,32,31,30,27,29,24,,,,,
|
98 |
+
01-ai/Yi-34B-200k,qa3,35,30,27,24,24,22,22,26,,,,
|
99 |
+
Mixtral-8x22B-Instruct-v0.1,qa3,53,56,49,39,31,27,26,26,,,,
|
100 |
+
activation-beacon-llama2-7b-chat,qa3,33,25,25,21,20,17,13,5,5,,,
|
101 |
+
Yarn-Mistral-7b-128k,qa3,31,36,33,32,27,25,9,13,7,,,
|
102 |
+
chatglm3-6b-128k,qa3,33,37,31,31,27,25,23,17,9,,,
|
103 |
+
activation-beacon-mistral-7b,qa3,36,33,25,21,18,15,15,15,16,,,
|
104 |
+
01-ai/Yi-9B-200k,qa3,34,33,29,24,25,21,20,20,8,,,
|
105 |
+
Phi-3-mini-128k-instruct,qa3,32,41,31,27,26,24,21,22,4,,,
|
106 |
+
ai21labs/Jamba-v0.1,qa3,32,31,29,26,24,22,22,21,26,,,
|
107 |
+
c4ai-command-r-v01,qa3,25,28,26,28,26,30,28,33,24,,,
|
108 |
+
Phi-3-medium-128k-instruct,qa3,53,51,45,35,30,30,27,25,17,,,
|
109 |
+
GPT-4,qa3,56,63,57,56,53,45,31,31,32,,,
|
110 |
+
~ Mamba (130M) fine-tune,qa3,97,97,97,97,97,96,95,92,81,,,
|
111 |
+
Llama3-ChatQA-1.5-8B + RAG,qa3,17,18,17,17,16,17,15,13,19,17,10,11
|
112 |
+
~ RMT (137M) fine-tune,qa3,97,94,88,81,73,66,55,55,36,25,22,21
|
113 |
+
~ ARMT (137M) fine-tune,qa3,97,,,92,92,92,91,90,86,80,72,37
|
114 |
+
GPT-2 (137M),qa4,29,18,,,,,,,,,,
|
115 |
+
mamba-2.8b-hf,qa4,96,59,47,12,0,,,,,,,
|
116 |
+
rwkv-6-world-7b,qa4,54,65,57,35,7,,,,,,,
|
117 |
+
v5-Eagle-7B-HF,qa4,79,74,63,55,3,,,,,,,
|
118 |
+
Meta-Llama-3-8B-Instruct,qa4,58,55,50,43,52,,,,,,,
|
119 |
+
LLaMA-2-7B-32K,qa4,26,54,51,51,46,36,3,,,,,
|
120 |
+
longchat-7b-v1.5-32k,qa4,60,55,52,57,57,49,4,,,,,
|
121 |
+
LongAlpaca-13B,qa4,65,61,58,52,50,44,4,,,,,
|
122 |
+
Llama-2-7B-32K-Instruct,qa4,39,52,54,56,55,52,6,,,,,
|
123 |
+
01-ai/Yi-34B,qa4,59,56,51,55,52,43,4,,,,,
|
124 |
+
Mistral-7b-Instruct-v0.2,qa4,54,58,58,60,60,58,54,,,,,
|
125 |
+
Mixtral-8x7B-Instruct-v0.1,qa4,55,60,59,61,63,61,58,,,,,
|
126 |
+
01-ai/Yi-34B-200k,qa4,64,65,64,63,61,56,54,44,,,,
|
127 |
+
Mixtral-8x22B-Instruct-v0.1,qa4,56,62,59,62,62,60,54,39,,,,
|
128 |
+
activation-beacon-llama2-7b-chat,qa4,40,50,52,43,34,22,14,9,10,,,
|
129 |
+
Yarn-Mistral-7b-128k,qa4,60,56,43,45,32,31,16,7,8,,,
|
130 |
+
chatglm3-6b-128k,qa4,45,48,42,38,35,32,27,13,11,,,
|
131 |
+
activation-beacon-mistral-7b,qa4,53,58,60,57,53,50,45,29,15,,,
|
132 |
+
01-ai/Yi-9B-200k,qa4,49,47,50,50,54,43,45,36,33,,,
|
133 |
+
Phi-3-mini-128k-instruct,qa4,54,56,56,50,49,50,45,47,5,,,
|
134 |
+
ai21labs/Jamba-v0.1,qa4,64,50,49,49,48,52,46,49,38,,,
|
135 |
+
c4ai-command-r-v01,qa4,46,58,59,54,56,46,46,47,52,,,
|
136 |
+
Phi-3-medium-128k-instruct,qa4,54,61,63,64,64,61,59,52,33,,,
|
137 |
+
GPT-4,qa4,98,70,63,60,52,47,46,40,32,,,
|
138 |
+
~ Mamba (130M) fine-tune,qa4,100,100,100,100,100,100,99,100,98,,,
|
139 |
+
Llama3-ChatQA-1.5-8B + RAG,qa4,53,58,56,59,57,60,60,59,60,59,54,56
|
140 |
+
~ RMT (137M) fine-tune,qa4,100,94,87,83,80,75,64,51,38,26,24,20
|
141 |
+
~ ARMT (137M) fine-tune,qa4,100,,,100,100,100,100,100,100,100,100,92
|
142 |
+
GPT-2 (137M),qa5,45,19,,,,,,,,,,
|
143 |
+
mamba-2.8b-hf,qa5,75,58,43,9,0,,,,,,,
|
144 |
+
rwkv-6-world-7b,qa5,79,77,80,61,14,,,,,,,
|
145 |
+
v5-Eagle-7B-HF,qa5,75,76,71,57,3,,,,,,,
|
146 |
+
Meta-Llama-3-8B-Instruct,qa5,85,78,73,65,73,,,,,,,
|
147 |
+
LLaMA-2-7B-32K,qa5,55,74,70,67,59,51,7,,,,,
|
148 |
+
longchat-7b-v1.5-32k,qa5,63,62,62,65,66,67,9,,,,,
|
149 |
+
LongAlpaca-13B,qa5,63,61,61,61,62,66,12,,,,,
|
150 |
+
Llama-2-7B-32K-Instruct,qa5,63,69,69,67,66,63,9,,,,,
|
151 |
+
01-ai/Yi-34B,qa5,88,70,59,48,53,55,4,,,,,
|
152 |
+
Mistral-7b-Instruct-v0.2,qa5,70,66,66,67,69,67,67,,,,,
|
153 |
+
Mixtral-8x7B-Instruct-v0.1,qa5,80,79,80,73,66,67,56,,,,,
|
154 |
+
01-ai/Yi-34B-200k,qa5,78,77,77,76,76,75,76,80,,,,
|
155 |
+
Mixtral-8x22B-Instruct-v0.1,qa5,87,84,84,79,69,64,63,63,,,,
|
156 |
+
activation-beacon-llama2-7b-chat,qa5,65,67,64,63,57,45,29,17,7,,,
|
157 |
+
Yarn-Mistral-7b-128k,qa5,58,47,45,47,47,38,23,17,19,,,
|
158 |
+
chatglm3-6b-128k,qa5,70,69,64,60,61,61,58,55,26,,,
|
159 |
+
activation-beacon-mistral-7b,qa5,68,66,66,74,74,66,67,55,28,,,
|
160 |
+
01-ai/Yi-9B-200k,qa5,76,59,50,57,57,45,52,47,40,,,
|
161 |
+
Phi-3-mini-128k-instruct,qa5,79,66,76,72,72,73,71,64,23,,,
|
162 |
+
ai21labs/Jamba-v0.1,qa5,83,70,64,62,58,64,63,60,50,,,
|
163 |
+
c4ai-command-r-v01,qa5,86,82,81,78,75,79,72,70,66,,,
|
164 |
+
Phi-3-medium-128k-instruct,qa5,77,85,84,81,82,82,81,78,69,,,
|
165 |
+
GPT-4,qa5,96,95,92,90,93,85,82,60,51,,,
|
166 |
+
~ Mamba (130M) fine-tune,qa5,98,99,98,99,99,99,98,99,98,,,
|
167 |
+
Llama3-ChatQA-1.5-8B + RAG,qa5,80,77,78,77,78,77,78,76,75,75,76,67
|
168 |
+
~ RMT (137M) fine-tune,qa5,100,100,99,99,99,94,90,89,86,69,63,44
|
169 |
+
~ ARMT (137M) fine-tune,qa5,"99,6",,,"98,1","98,2","98,1",98,"97,9","96,9","95,3","93,4","76,6"
|
notebooks/test.ipynb
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 24,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"res_path = '../results'"
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"cell_type": "code",
|
24 |
+
"execution_count": 25,
|
25 |
+
"metadata": {},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n",
|
29 |
+
"res_df = pd.read_csv(p)\n",
|
30 |
+
"res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": 30,
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n",
|
40 |
+
"len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n",
|
41 |
+
"\n",
|
42 |
+
"for model_name in res_df.Model.unique():\n",
|
43 |
+
" model_df = res_df[res_df.Model == model_name]\n",
|
44 |
+
" for i, row in model_df.iterrows():\n",
|
45 |
+
" for l, ln in zip(lens, len_names):\n",
|
46 |
+
" score = row[ln]\n",
|
47 |
+
" # print(score)\n",
|
48 |
+
" if not pd.isna(score):\n",
|
49 |
+
" os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n",
|
50 |
+
" os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n",
|
51 |
+
" path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n",
|
52 |
+
" df = pd.DataFrame([{'result': score}])\n",
|
53 |
+
" df.to_csv(path, index=False)"
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"metadata": {
|
58 |
+
"kernelspec": {
|
59 |
+
"display_name": "Python 3",
|
60 |
+
"language": "python",
|
61 |
+
"name": "python3"
|
62 |
+
},
|
63 |
+
"language_info": {
|
64 |
+
"codemirror_mode": {
|
65 |
+
"name": "ipython",
|
66 |
+
"version": 3
|
67 |
+
},
|
68 |
+
"file_extension": ".py",
|
69 |
+
"mimetype": "text/x-python",
|
70 |
+
"name": "python",
|
71 |
+
"nbconvert_exporter": "python",
|
72 |
+
"pygments_lexer": "ipython3",
|
73 |
+
"version": "3.10.13"
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"nbformat": 4,
|
77 |
+
"nbformat_minor": 2
|
78 |
+
}
|
results/01-ai/Yi-34B-200k/qa1/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
85
|
|
|
1 |
result
|
2 |
+
85.0
|
results/01-ai/Yi-34B-200k/qa1/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
73
|
|
|
1 |
result
|
2 |
+
73.0
|
results/01-ai/Yi-34B-200k/qa1/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
65
|
|
|
1 |
result
|
2 |
+
65.0
|
results/01-ai/Yi-34B-200k/qa1/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
68
|
|
|
1 |
result
|
2 |
+
68.0
|
results/01-ai/Yi-34B-200k/qa1/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
62
|
|
|
1 |
result
|
2 |
+
62.0
|
results/01-ai/Yi-34B-200k/qa1/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
66
|
|
|
1 |
result
|
2 |
+
66.0
|
results/01-ai/Yi-34B-200k/qa1/64000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
60
|
|
|
1 |
result
|
2 |
+
60.0
|
results/01-ai/Yi-34B-200k/qa1/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
63
|
|
|
1 |
result
|
2 |
+
63.0
|
results/01-ai/Yi-34B-200k/qa2/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
62
|
|
|
1 |
result
|
2 |
+
62.0
|
results/01-ai/Yi-34B-200k/qa2/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
51
|
|
|
1 |
result
|
2 |
+
51.0
|
results/01-ai/Yi-34B-200k/qa2/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
32
|
|
|
1 |
result
|
2 |
+
32.0
|
results/01-ai/Yi-34B-200k/qa2/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
46
|
|
|
1 |
result
|
2 |
+
46.0
|
results/01-ai/Yi-34B-200k/qa2/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
27
|
|
|
1 |
result
|
2 |
+
27.0
|
results/01-ai/Yi-34B-200k/qa2/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
42
|
|
|
1 |
result
|
2 |
+
42.0
|
results/01-ai/Yi-34B-200k/qa2/64000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
30
|
|
|
1 |
result
|
2 |
+
30.0
|
results/01-ai/Yi-34B-200k/qa2/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
35
|
|
|
1 |
result
|
2 |
+
35.0
|
results/01-ai/Yi-34B-200k/qa3/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
35
|
|
|
1 |
result
|
2 |
+
35.0
|
results/01-ai/Yi-34B-200k/qa3/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
30
|
|
|
1 |
result
|
2 |
+
30.0
|
results/01-ai/Yi-34B-200k/qa3/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
22
|
|
|
1 |
result
|
2 |
+
22.0
|
results/01-ai/Yi-34B-200k/qa3/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
27
|
|
|
1 |
result
|
2 |
+
27.0
|
results/01-ai/Yi-34B-200k/qa3/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
22
|
|
|
1 |
result
|
2 |
+
22.0
|
results/01-ai/Yi-34B-200k/qa3/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
24
|
|
|
1 |
result
|
2 |
+
24.0
|
results/01-ai/Yi-34B-200k/qa3/64000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
26
|
|
|
1 |
result
|
2 |
+
26.0
|
results/01-ai/Yi-34B-200k/qa3/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
24
|
|
|
1 |
result
|
2 |
+
24.0
|
results/01-ai/Yi-34B-200k/qa4/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
64
|
|
|
1 |
result
|
2 |
+
64.0
|
results/01-ai/Yi-34B-200k/qa4/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
65
|
|
|
1 |
result
|
2 |
+
65.0
|
results/01-ai/Yi-34B-200k/qa4/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
56
|
|
|
1 |
result
|
2 |
+
56.0
|
results/01-ai/Yi-34B-200k/qa4/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
64
|
|
|
1 |
result
|
2 |
+
64.0
|
results/01-ai/Yi-34B-200k/qa4/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
54
|
|
|
1 |
result
|
2 |
+
54.0
|
results/01-ai/Yi-34B-200k/qa4/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
63
|
|
|
1 |
result
|
2 |
+
63.0
|
results/01-ai/Yi-34B-200k/qa4/64000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
44
|
|
|
1 |
result
|
2 |
+
44.0
|
results/01-ai/Yi-34B-200k/qa4/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
61
|
|
|
1 |
result
|
2 |
+
61.0
|
results/01-ai/Yi-34B-200k/qa5/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
78
|
|
|
1 |
result
|
2 |
+
78.0
|
results/01-ai/Yi-34B-200k/qa5/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
77
|
|
|
1 |
result
|
2 |
+
77.0
|
results/01-ai/Yi-34B-200k/qa5/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
75
|
|
|
1 |
result
|
2 |
+
75.0
|
results/01-ai/Yi-34B-200k/qa5/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
77
|
|
|
1 |
result
|
2 |
+
77.0
|
results/01-ai/Yi-34B-200k/qa5/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
76
|
|
|
1 |
result
|
2 |
+
76.0
|
results/01-ai/Yi-34B-200k/qa5/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
76
|
|
|
1 |
result
|
2 |
+
76.0
|
results/01-ai/Yi-34B-200k/qa5/64000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
80
|
|
|
1 |
result
|
2 |
+
80.0
|
results/01-ai/Yi-34B-200k/qa5/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
76
|
|
|
1 |
result
|
2 |
+
76.0
|
results/01-ai/Yi-34B/qa1/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
99
|
|
|
1 |
result
|
2 |
+
99.0
|
results/01-ai/Yi-34B/qa1/1000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
59
|
|
|
1 |
result
|
2 |
+
59.0
|
results/01-ai/Yi-34B/qa1/16000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
31
|
|
|
1 |
result
|
2 |
+
31.0
|
results/01-ai/Yi-34B/qa1/2000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
51
|
|
|
1 |
result
|
2 |
+
51.0
|
results/01-ai/Yi-34B/qa1/32000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
4
|
|
|
1 |
result
|
2 |
+
4.0
|
results/01-ai/Yi-34B/qa1/4000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
34
|
|
|
1 |
result
|
2 |
+
34.0
|
results/01-ai/Yi-34B/qa1/8000.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
46
|
|
|
1 |
result
|
2 |
+
46.0
|
results/01-ai/Yi-34B/qa2/0.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
result
|
2 |
-
67
|
|
|
1 |
result
|
2 |
+
67.0
|