Spaces:
Running
Running
Julien Simon
commited on
Commit
•
633e287
1
Parent(s):
eeaeace
Fix case
Browse files- results_arcee_fcv1.py +9 -9
- results_arcee_nova.py +3 -3
- results_arcee_supernova.py +5 -5
- results_arcee_supernova_medius.py +9 -9
results_arcee_fcv1.py
CHANGED
@@ -14,7 +14,7 @@ results_arcee_fcv1 = {
|
|
14 |
},
|
15 |
{
|
16 |
"instanceType": "g5.12xlarge",
|
17 |
-
"quantization": "
|
18 |
"container": "LMI 0.30+vLLM 0.6.2",
|
19 |
"status": "OK",
|
20 |
"tokensPerSecond": "23",
|
@@ -22,7 +22,7 @@ results_arcee_fcv1 = {
|
|
22 |
},
|
23 |
{
|
24 |
"instanceType": "g6.12xlarge",
|
25 |
-
"quantization": "
|
26 |
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
"status": "OK",
|
28 |
"tokensPerSecond": "14",
|
@@ -30,7 +30,7 @@ results_arcee_fcv1 = {
|
|
30 |
},
|
31 |
{
|
32 |
"instanceType": "g6e.12xlarge",
|
33 |
-
"quantization": "
|
34 |
"container": "vLLM0.6.4.post1",
|
35 |
"status": "OK",
|
36 |
"tokensPerSecond": "36",
|
@@ -38,7 +38,7 @@ results_arcee_fcv1 = {
|
|
38 |
},
|
39 |
{
|
40 |
"instanceType": "g6e.12xlarge (2 GPUs)",
|
41 |
-
"quantization": "
|
42 |
"container": "vLLM 0.6.4.post1",
|
43 |
"status": "OK",
|
44 |
"tokensPerSecond": "21",
|
@@ -46,7 +46,7 @@ results_arcee_fcv1 = {
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "p4d.24xlarge",
|
49 |
-
"quantization": "
|
50 |
"container": "LMI 0.30+vLLM 0.6.2",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "72.5",
|
@@ -54,7 +54,7 @@ results_arcee_fcv1 = {
|
|
54 |
},
|
55 |
{
|
56 |
"instanceType": "p5.48xlarge",
|
57 |
-
"quantization": "
|
58 |
"container": "vLLM 0.6.4.post1",
|
59 |
"status": "OK",
|
60 |
"tokensPerSecond": "117",
|
@@ -62,7 +62,7 @@ results_arcee_fcv1 = {
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
65 |
-
"quantization": "
|
66 |
"container": "vLLM 0.6.4.post1",
|
67 |
"status": "OK",
|
68 |
"tokensPerSecond": "88",
|
@@ -70,7 +70,7 @@ results_arcee_fcv1 = {
|
|
70 |
},
|
71 |
{
|
72 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
73 |
-
"quantization": "
|
74 |
"container": "vLLM 0.6.4.post1",
|
75 |
"status": "OK",
|
76 |
"tokensPerSecond": "58",
|
@@ -78,7 +78,7 @@ results_arcee_fcv1 = {
|
|
78 |
},
|
79 |
{
|
80 |
"instanceType": "p5.48xlarge (1 GPU)",
|
81 |
-
"quantization": "
|
82 |
"container": "vLLM 0.6.4.post1",
|
83 |
"status": "OK",
|
84 |
"tokensPerSecond": "38",
|
|
|
14 |
},
|
15 |
{
|
16 |
"instanceType": "g5.12xlarge",
|
17 |
+
"quantization": "none",
|
18 |
"container": "LMI 0.30+vLLM 0.6.2",
|
19 |
"status": "OK",
|
20 |
"tokensPerSecond": "23",
|
|
|
22 |
},
|
23 |
{
|
24 |
"instanceType": "g6.12xlarge",
|
25 |
+
"quantization": "none",
|
26 |
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
"status": "OK",
|
28 |
"tokensPerSecond": "14",
|
|
|
30 |
},
|
31 |
{
|
32 |
"instanceType": "g6e.12xlarge",
|
33 |
+
"quantization": "none",
|
34 |
"container": "vLLM0.6.4.post1",
|
35 |
"status": "OK",
|
36 |
"tokensPerSecond": "36",
|
|
|
38 |
},
|
39 |
{
|
40 |
"instanceType": "g6e.12xlarge (2 GPUs)",
|
41 |
+
"quantization": "none",
|
42 |
"container": "vLLM 0.6.4.post1",
|
43 |
"status": "OK",
|
44 |
"tokensPerSecond": "21",
|
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "p4d.24xlarge",
|
49 |
+
"quantization": "none",
|
50 |
"container": "LMI 0.30+vLLM 0.6.2",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "72.5",
|
|
|
54 |
},
|
55 |
{
|
56 |
"instanceType": "p5.48xlarge",
|
57 |
+
"quantization": "none",
|
58 |
"container": "vLLM 0.6.4.post1",
|
59 |
"status": "OK",
|
60 |
"tokensPerSecond": "117",
|
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
65 |
+
"quantization": "none",
|
66 |
"container": "vLLM 0.6.4.post1",
|
67 |
"status": "OK",
|
68 |
"tokensPerSecond": "88",
|
|
|
70 |
},
|
71 |
{
|
72 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
73 |
+
"quantization": "none",
|
74 |
"container": "vLLM 0.6.4.post1",
|
75 |
"status": "OK",
|
76 |
"tokensPerSecond": "58",
|
|
|
78 |
},
|
79 |
{
|
80 |
"instanceType": "p5.48xlarge (1 GPU)",
|
81 |
+
"quantization": "none",
|
82 |
"container": "vLLM 0.6.4.post1",
|
83 |
"status": "OK",
|
84 |
"tokensPerSecond": "38",
|
results_arcee_nova.py
CHANGED
@@ -155,7 +155,7 @@ results_arcee_nova = {
|
|
155 |
},
|
156 |
{
|
157 |
"instanceType": "p5.48xlarge",
|
158 |
-
"quantization": "
|
159 |
"container": "vLLM 0.6.4.post1",
|
160 |
"status": "OK",
|
161 |
"tokensPerSecond": "76",
|
@@ -163,7 +163,7 @@ results_arcee_nova = {
|
|
163 |
},
|
164 |
{
|
165 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
166 |
-
"quantization": "
|
167 |
"container": "vLLM 0.6.4.post1",
|
168 |
"status": "OK",
|
169 |
"tokensPerSecond": "51",
|
@@ -171,7 +171,7 @@ results_arcee_nova = {
|
|
171 |
},
|
172 |
{
|
173 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
174 |
-
"quantization": "
|
175 |
"container": "vLLM 0.6.4.post1",
|
176 |
"status": "OK",
|
177 |
"tokensPerSecond": "32",
|
|
|
155 |
},
|
156 |
{
|
157 |
"instanceType": "p5.48xlarge",
|
158 |
+
"quantization": "none",
|
159 |
"container": "vLLM 0.6.4.post1",
|
160 |
"status": "OK",
|
161 |
"tokensPerSecond": "76",
|
|
|
163 |
},
|
164 |
{
|
165 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
166 |
+
"quantization": "none",
|
167 |
"container": "vLLM 0.6.4.post1",
|
168 |
"status": "OK",
|
169 |
"tokensPerSecond": "51",
|
|
|
171 |
},
|
172 |
{
|
173 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
174 |
+
"quantization": "none",
|
175 |
"container": "vLLM 0.6.4.post1",
|
176 |
"status": "OK",
|
177 |
"tokensPerSecond": "32",
|
results_arcee_supernova.py
CHANGED
@@ -46,7 +46,7 @@ results_arcee_supernova = {
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.12xlarge",
|
49 |
-
"quantization": "
|
50 |
"container": "vLLM 0.6.3",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "18.6",
|
@@ -186,15 +186,15 @@ results_arcee_supernova = {
|
|
186 |
},
|
187 |
{
|
188 |
"instanceType": "p5.48xlarge",
|
189 |
-
"quantization": "
|
190 |
"container": "vLLM 0.6.4.post1",
|
191 |
-
"status": "
|
192 |
"tokensPerSecond": "77",
|
193 |
"notes": "--tensor-parallel-size 8",
|
194 |
},
|
195 |
{
|
196 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
197 |
-
"quantization": "
|
198 |
"container": "vLLM 0.6.4.post1",
|
199 |
"status": "OK",
|
200 |
"tokensPerSecond": "53",
|
@@ -202,7 +202,7 @@ results_arcee_supernova = {
|
|
202 |
},
|
203 |
{
|
204 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
205 |
-
"quantization": "
|
206 |
"container": "vLLM 0.6.4.post1",
|
207 |
"status": "OK",
|
208 |
"tokensPerSecond": "33",
|
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.12xlarge",
|
49 |
+
"quantization": "none",
|
50 |
"container": "vLLM 0.6.3",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "18.6",
|
|
|
186 |
},
|
187 |
{
|
188 |
"instanceType": "p5.48xlarge",
|
189 |
+
"quantization": "none",
|
190 |
"container": "vLLM 0.6.4.post1",
|
191 |
+
"status": "OK",
|
192 |
"tokensPerSecond": "77",
|
193 |
"notes": "--tensor-parallel-size 8",
|
194 |
},
|
195 |
{
|
196 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
197 |
+
"quantization": "none",
|
198 |
"container": "vLLM 0.6.4.post1",
|
199 |
"status": "OK",
|
200 |
"tokensPerSecond": "53",
|
|
|
202 |
},
|
203 |
{
|
204 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
205 |
+
"quantization": "none",
|
206 |
"container": "vLLM 0.6.4.post1",
|
207 |
"status": "OK",
|
208 |
"tokensPerSecond": "33",
|
results_arcee_supernova_medius.py
CHANGED
@@ -22,7 +22,7 @@ results_arcee_supernova_medius = {
|
|
22 |
},
|
23 |
{
|
24 |
"instanceType": "g5.12xlarge",
|
25 |
-
"quantization": "
|
26 |
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
"status": "OK",
|
28 |
"tokensPerSecond": "45",
|
@@ -30,7 +30,7 @@ results_arcee_supernova_medius = {
|
|
30 |
},
|
31 |
{
|
32 |
"instanceType": "g6.12xlarge",
|
33 |
-
"quantization": "
|
34 |
"container": "LMI 0.30+vLLM 0.6.2",
|
35 |
"status": "OK",
|
36 |
"tokensPerSecond": "29",
|
@@ -38,7 +38,7 @@ results_arcee_supernova_medius = {
|
|
38 |
},
|
39 |
{
|
40 |
"instanceType": "g6e.12xlarge",
|
41 |
-
"quantization": "
|
42 |
"container": "vLLM0.6.4.post1",
|
43 |
"status": "OK",
|
44 |
"tokensPerSecond": "70",
|
@@ -46,7 +46,7 @@ results_arcee_supernova_medius = {
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.12xlarge (2 GPUs)",
|
49 |
-
"quantization": "
|
50 |
"container": "vLLM 0.6.4.post1",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "43",
|
@@ -54,7 +54,7 @@ results_arcee_supernova_medius = {
|
|
54 |
},
|
55 |
{
|
56 |
"instanceType": "p4d.24xlarge",
|
57 |
-
"quantization": "
|
58 |
"container": "LMI 0.30+vLLM 0.6.2",
|
59 |
"status": "OK",
|
60 |
"tokensPerSecond": "108",
|
@@ -62,7 +62,7 @@ results_arcee_supernova_medius = {
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
-
"quantization": "
|
66 |
"container": "vLLM 0.6.4.post1",
|
67 |
"status": "OK",
|
68 |
"tokensPerSecond": "162",
|
@@ -70,7 +70,7 @@ results_arcee_supernova_medius = {
|
|
70 |
},
|
71 |
{
|
72 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
73 |
-
"quantization": "
|
74 |
"container": "vLLM 0.6.4.post1",
|
75 |
"status": "OK",
|
76 |
"tokensPerSecond": "138",
|
@@ -78,7 +78,7 @@ results_arcee_supernova_medius = {
|
|
78 |
},
|
79 |
{
|
80 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
81 |
-
"quantization": "
|
82 |
"container": "vLLM 0.6.4.post1",
|
83 |
"status": "OK",
|
84 |
"tokensPerSecond": "102",
|
@@ -86,7 +86,7 @@ results_arcee_supernova_medius = {
|
|
86 |
},
|
87 |
{
|
88 |
"instanceType": "p5.48xlarge (1 GPU)",
|
89 |
-
"quantization": "
|
90 |
"container": "vLLM 0.6.4.post1",
|
91 |
"status": "OK",
|
92 |
"tokensPerSecond": "73",
|
|
|
22 |
},
|
23 |
{
|
24 |
"instanceType": "g5.12xlarge",
|
25 |
+
"quantization": "none",
|
26 |
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
"status": "OK",
|
28 |
"tokensPerSecond": "45",
|
|
|
30 |
},
|
31 |
{
|
32 |
"instanceType": "g6.12xlarge",
|
33 |
+
"quantization": "none",
|
34 |
"container": "LMI 0.30+vLLM 0.6.2",
|
35 |
"status": "OK",
|
36 |
"tokensPerSecond": "29",
|
|
|
38 |
},
|
39 |
{
|
40 |
"instanceType": "g6e.12xlarge",
|
41 |
+
"quantization": "none",
|
42 |
"container": "vLLM0.6.4.post1",
|
43 |
"status": "OK",
|
44 |
"tokensPerSecond": "70",
|
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.12xlarge (2 GPUs)",
|
49 |
+
"quantization": "none",
|
50 |
"container": "vLLM 0.6.4.post1",
|
51 |
"status": "OK",
|
52 |
"tokensPerSecond": "43",
|
|
|
54 |
},
|
55 |
{
|
56 |
"instanceType": "p4d.24xlarge",
|
57 |
+
"quantization": "none",
|
58 |
"container": "LMI 0.30+vLLM 0.6.2",
|
59 |
"status": "OK",
|
60 |
"tokensPerSecond": "108",
|
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
+
"quantization": "none",
|
66 |
"container": "vLLM 0.6.4.post1",
|
67 |
"status": "OK",
|
68 |
"tokensPerSecond": "162",
|
|
|
70 |
},
|
71 |
{
|
72 |
"instanceType": "p5.48xlarge (4 GPUs)",
|
73 |
+
"quantization": "none",
|
74 |
"container": "vLLM 0.6.4.post1",
|
75 |
"status": "OK",
|
76 |
"tokensPerSecond": "138",
|
|
|
78 |
},
|
79 |
{
|
80 |
"instanceType": "p5.48xlarge (2 GPUs)",
|
81 |
+
"quantization": "none",
|
82 |
"container": "vLLM 0.6.4.post1",
|
83 |
"status": "OK",
|
84 |
"tokensPerSecond": "102",
|
|
|
86 |
},
|
87 |
{
|
88 |
"instanceType": "p5.48xlarge (1 GPU)",
|
89 |
+
"quantization": "none",
|
90 |
"container": "vLLM 0.6.4.post1",
|
91 |
"status": "OK",
|
92 |
"tokensPerSecond": "73",
|