Spaces:
Running
Running
Julien Simon
commited on
Commit
•
6f23d6c
1
Parent(s):
f7eda6a
Add Llama-Spark on g6e.2xlarge with SGLang
Browse files- results.py +21 -10
results.py
CHANGED
@@ -239,7 +239,7 @@ results = {
|
|
239 |
"modelType": "Llama 3.1 8B",
|
240 |
"configurations": [
|
241 |
{
|
242 |
-
"region": "
|
243 |
"instanceType": "g5.2xlarge",
|
244 |
"cloud": "AWS",
|
245 |
"gpu": "1xNVIDIA A10G",
|
@@ -251,7 +251,7 @@ results = {
|
|
251 |
"notes": "4K/8K fails",
|
252 |
},
|
253 |
{
|
254 |
-
"region": "
|
255 |
"instanceType": "g5.12xlarge",
|
256 |
"cloud": "AWS",
|
257 |
"gpu": "4xNVIDIA A10G",
|
@@ -263,7 +263,7 @@ results = {
|
|
263 |
"notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
|
264 |
},
|
265 |
{
|
266 |
-
"region": "
|
267 |
"instanceType": "g5.48xlarge",
|
268 |
"cloud": "AWS",
|
269 |
"gpu": "8xNVIDIA A10G",
|
@@ -275,7 +275,7 @@ results = {
|
|
275 |
"notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
|
276 |
},
|
277 |
{
|
278 |
-
"region": "
|
279 |
"instanceType": "g6.2xlarge",
|
280 |
"cloud": "AWS",
|
281 |
"gpu": "1xNVIDIA L4",
|
@@ -291,7 +291,7 @@ results = {
|
|
291 |
],
|
292 |
},
|
293 |
{
|
294 |
-
"region": "
|
295 |
"instanceType": "g6.12xlarge",
|
296 |
"cloud": "AWS",
|
297 |
"gpu": "4xNVIDIA L4",
|
@@ -303,7 +303,7 @@ results = {
|
|
303 |
"notes": "same as g5?",
|
304 |
},
|
305 |
{
|
306 |
-
"region": "
|
307 |
"instanceType": "g6.48xlarge",
|
308 |
"cloud": "AWS",
|
309 |
"gpu": "8xNVIDIA L4",
|
@@ -315,7 +315,7 @@ results = {
|
|
315 |
"notes": "same as g5?",
|
316 |
},
|
317 |
{
|
318 |
-
"region": "
|
319 |
"instanceType": "g6e.2xlarge",
|
320 |
"cloud": "AWS",
|
321 |
"gpu": "1xNVIDIA L40S",
|
@@ -323,10 +323,21 @@ results = {
|
|
323 |
"quantization": "none",
|
324 |
"tgi": "TGI 2.2.0",
|
325 |
"status": "OK",
|
326 |
-
"tokensPerSecond": "42",
|
327 |
},
|
328 |
{
|
329 |
-
"region": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
"instanceType": "p4d.24xlarge",
|
331 |
"cloud": "AWS",
|
332 |
"gpu": "4xNVIDIA A100",
|
@@ -338,7 +349,7 @@ results = {
|
|
338 |
"notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
|
339 |
},
|
340 |
{
|
341 |
-
"region": "
|
342 |
"instanceType": "inf2.*",
|
343 |
"cloud": "AWS",
|
344 |
"gpu": "-",
|
|
|
239 |
"modelType": "Llama 3.1 8B",
|
240 |
"configurations": [
|
241 |
{
|
242 |
+
"region": "AWS",
|
243 |
"instanceType": "g5.2xlarge",
|
244 |
"cloud": "AWS",
|
245 |
"gpu": "1xNVIDIA A10G",
|
|
|
251 |
"notes": "4K/8K fails",
|
252 |
},
|
253 |
{
|
254 |
+
"region": "AWS",
|
255 |
"instanceType": "g5.12xlarge",
|
256 |
"cloud": "AWS",
|
257 |
"gpu": "4xNVIDIA A10G",
|
|
|
263 |
"notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
|
264 |
},
|
265 |
{
|
266 |
+
"region": "AWS",
|
267 |
"instanceType": "g5.48xlarge",
|
268 |
"cloud": "AWS",
|
269 |
"gpu": "8xNVIDIA A10G",
|
|
|
275 |
"notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
|
276 |
},
|
277 |
{
|
278 |
+
"region": "AWS",
|
279 |
"instanceType": "g6.2xlarge",
|
280 |
"cloud": "AWS",
|
281 |
"gpu": "1xNVIDIA L4",
|
|
|
291 |
],
|
292 |
},
|
293 |
{
|
294 |
+
"region": "AWS",
|
295 |
"instanceType": "g6.12xlarge",
|
296 |
"cloud": "AWS",
|
297 |
"gpu": "4xNVIDIA L4",
|
|
|
303 |
"notes": "same as g5?",
|
304 |
},
|
305 |
{
|
306 |
+
"region": "AWS",
|
307 |
"instanceType": "g6.48xlarge",
|
308 |
"cloud": "AWS",
|
309 |
"gpu": "8xNVIDIA L4",
|
|
|
315 |
"notes": "same as g5?",
|
316 |
},
|
317 |
{
|
318 |
+
"region": "AWS",
|
319 |
"instanceType": "g6e.2xlarge",
|
320 |
"cloud": "AWS",
|
321 |
"gpu": "1xNVIDIA L40S",
|
|
|
323 |
"quantization": "none",
|
324 |
"tgi": "TGI 2.2.0",
|
325 |
"status": "OK",
|
326 |
+
"tokensPerSecond": "42.1",
|
327 |
},
|
328 |
{
|
329 |
+
"region": "AWS",
|
330 |
+
"instanceType": "g6e.2xlarge",
|
331 |
+
"cloud": "AWS",
|
332 |
+
"gpu": "1xNVIDIA L40S",
|
333 |
+
"gpuRAM": "48 GB",
|
334 |
+
"quantization": "none",
|
335 |
+
"tgi": "SGLang 0.2.13",
|
336 |
+
"status": "OK",
|
337 |
+
"tokensPerSecond": "45",
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"region": "AWS",
|
341 |
"instanceType": "p4d.24xlarge",
|
342 |
"cloud": "AWS",
|
343 |
"gpu": "4xNVIDIA A100",
|
|
|
349 |
"notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
|
350 |
},
|
351 |
{
|
352 |
+
"region": "AWS",
|
353 |
"instanceType": "inf2.*",
|
354 |
"cloud": "AWS",
|
355 |
"gpu": "-",
|