Spaces:
Running
Running
Julien Simon
commited on
Commit
•
316f1a9
1
Parent(s):
a49294b
Add H100 for Nova and SuperNova
Browse files- results_arcee_supernova.py +55 -53
results_arcee_supernova.py
CHANGED
@@ -62,59 +62,11 @@ results_arcee_supernova = {
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
-
"
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
"tokensPerSecond": "73",
|
71 |
-
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
|
72 |
-
},
|
73 |
-
{
|
74 |
-
"quantization": "none",
|
75 |
-
"container": "TGI 2.2.0",
|
76 |
-
"status": "OK",
|
77 |
-
"tokensPerSecond": "58",
|
78 |
-
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"quantization": "none",
|
82 |
-
"container": "LMI 0.29+vLLM 0.5.5",
|
83 |
-
"status": "OK",
|
84 |
-
"tokensPerSecond": "70",
|
85 |
-
"notes": "OPTION_MAX_MODEL_LEN 128k",
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"quantization": "none",
|
89 |
-
"container": "LMI 0.29+vLLM 0.5.5",
|
90 |
-
"status": "OK",
|
91 |
-
"tokensPerSecond": "70",
|
92 |
-
"notes": "OPTION_ENFORCE_EAGER=True",
|
93 |
-
},
|
94 |
-
{
|
95 |
-
"quantization": "None",
|
96 |
-
"container": "vLLM 0.6.4.post1",
|
97 |
-
"status": "OK",
|
98 |
-
"tokensPerSecond": "77",
|
99 |
-
"notes": "--tensor-parallel-size 8",
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"quantization": "None",
|
103 |
-
"container": "vLLM 0.6.4.post1",
|
104 |
-
"status": "OK",
|
105 |
-
"tokensPerSecond": "53",
|
106 |
-
"notes": "--tensor-parallel-size 4",
|
107 |
-
"gpuCount": "4",
|
108 |
-
},
|
109 |
-
{
|
110 |
-
"quantization": "None",
|
111 |
-
"container": "vLLM 0.6.4.post1",
|
112 |
-
"status": "OK",
|
113 |
-
"tokensPerSecond": "33",
|
114 |
-
"notes": "--tensor-parallel-size 2 --gpu_memory-utilization 0.95",
|
115 |
-
"gpuCount": "2",
|
116 |
-
},
|
117 |
-
],
|
118 |
},
|
119 |
{
|
120 |
"instanceType": "inf2.24xlarge",
|
@@ -206,5 +158,55 @@ results_arcee_supernova = {
|
|
206 |
},
|
207 |
],
|
208 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
],
|
210 |
}
|
|
|
62 |
},
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
+
"quantization": "awq",
|
66 |
+
"container": "TGI 2.2.0",
|
67 |
+
"status": "OK",
|
68 |
+
"tokensPerSecond": "73",
|
69 |
+
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
},
|
71 |
{
|
72 |
"instanceType": "inf2.24xlarge",
|
|
|
158 |
},
|
159 |
],
|
160 |
},
|
161 |
+
{
|
162 |
+
"instanceType": "p5.48xlarge",
|
163 |
+
"configurations": [
|
164 |
+
{
|
165 |
+
"quantization": "none",
|
166 |
+
"container": "TGI 2.2.0",
|
167 |
+
"status": "OK",
|
168 |
+
"tokensPerSecond": "58",
|
169 |
+
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"quantization": "none",
|
173 |
+
"container": "LMI 0.29+vLLM 0.5.5",
|
174 |
+
"status": "OK",
|
175 |
+
"tokensPerSecond": "70",
|
176 |
+
"notes": "OPTION_MAX_MODEL_LEN 128k",
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"quantization": "none",
|
180 |
+
"container": "LMI 0.29+vLLM 0.5.5",
|
181 |
+
"status": "OK",
|
182 |
+
"tokensPerSecond": "70",
|
183 |
+
"notes": "OPTION_ENFORCE_EAGER=True",
|
184 |
+
},
|
185 |
+
],
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"instanceType": "p5.48xlarge",
|
189 |
+
"quantization": "None",
|
190 |
+
"container": "vLLM 0.6.4.post1",
|
191 |
+
"status": "N/A",
|
192 |
+
"tokensPerSecond": "77",
|
193 |
+
"notes": "--tensor-parallel-size 8",
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"instanceType": "p5.48xlarge (4 GPUs)",
|
197 |
+
"quantization": "None",
|
198 |
+
"container": "vLLM 0.6.4.post1",
|
199 |
+
"status": "OK",
|
200 |
+
"tokensPerSecond": "53",
|
201 |
+
"notes": "--tensor-parallel-size 4",
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"instanceType": "p5.48xlarge (2 GPUs)",
|
205 |
+
"quantization": "None",
|
206 |
+
"container": "vLLM 0.6.4.post1",
|
207 |
+
"status": "OK",
|
208 |
+
"tokensPerSecond": "xxx",
|
209 |
+
"notes": "--tensor-parallel-size 2",
|
210 |
+
},
|
211 |
],
|
212 |
}
|