Julien Simon commited on
Commit
6f23d6c
1 Parent(s): f7eda6a

Add Llama-Spark on g6e.2xlarge with SGLang

Browse files
Files changed (1) hide show
  1. results.py +21 -10
results.py CHANGED
@@ -239,7 +239,7 @@ results = {
239
  "modelType": "Llama 3.1 8B",
240
  "configurations": [
241
  {
242
- "region": "us-west-2",
243
  "instanceType": "g5.2xlarge",
244
  "cloud": "AWS",
245
  "gpu": "1xNVIDIA A10G",
@@ -251,7 +251,7 @@ results = {
251
  "notes": "4K/8K fails",
252
  },
253
  {
254
- "region": "us-west-2",
255
  "instanceType": "g5.12xlarge",
256
  "cloud": "AWS",
257
  "gpu": "4xNVIDIA A10G",
@@ -263,7 +263,7 @@ results = {
263
  "notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
264
  },
265
  {
266
- "region": "us-west-2",
267
  "instanceType": "g5.48xlarge",
268
  "cloud": "AWS",
269
  "gpu": "8xNVIDIA A10G",
@@ -275,7 +275,7 @@ results = {
275
  "notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
276
  },
277
  {
278
- "region": "us-west-2",
279
  "instanceType": "g6.2xlarge",
280
  "cloud": "AWS",
281
  "gpu": "1xNVIDIA L4",
@@ -291,7 +291,7 @@ results = {
291
  ],
292
  },
293
  {
294
- "region": "us-west-2",
295
  "instanceType": "g6.12xlarge",
296
  "cloud": "AWS",
297
  "gpu": "4xNVIDIA L4",
@@ -303,7 +303,7 @@ results = {
303
  "notes": "same as g5?",
304
  },
305
  {
306
- "region": "us-west-2",
307
  "instanceType": "g6.48xlarge",
308
  "cloud": "AWS",
309
  "gpu": "8xNVIDIA L4",
@@ -315,7 +315,7 @@ results = {
315
  "notes": "same as g5?",
316
  },
317
  {
318
- "region": "us-west-2",
319
  "instanceType": "g6e.2xlarge",
320
  "cloud": "AWS",
321
  "gpu": "1xNVIDIA L40S",
@@ -323,10 +323,21 @@ results = {
323
  "quantization": "none",
324
  "tgi": "TGI 2.2.0",
325
  "status": "OK",
326
- "tokensPerSecond": "42",
327
  },
328
  {
329
- "region": "us-west-2",
 
 
 
 
 
 
 
 
 
 
 
330
  "instanceType": "p4d.24xlarge",
331
  "cloud": "AWS",
332
  "gpu": "4xNVIDIA A100",
@@ -338,7 +349,7 @@ results = {
338
  "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
339
  },
340
  {
341
- "region": "us-west-2",
342
  "instanceType": "inf2.*",
343
  "cloud": "AWS",
344
  "gpu": "-",
 
239
  "modelType": "Llama 3.1 8B",
240
  "configurations": [
241
  {
242
+ "region": "AWS",
243
  "instanceType": "g5.2xlarge",
244
  "cloud": "AWS",
245
  "gpu": "1xNVIDIA A10G",
 
251
  "notes": "4K/8K fails",
252
  },
253
  {
254
+ "region": "AWS",
255
  "instanceType": "g5.12xlarge",
256
  "cloud": "AWS",
257
  "gpu": "4xNVIDIA A10G",
 
263
  "notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
264
  },
265
  {
266
+ "region": "AWS",
267
  "instanceType": "g5.48xlarge",
268
  "cloud": "AWS",
269
  "gpu": "8xNVIDIA A10G",
 
275
  "notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
276
  },
277
  {
278
+ "region": "AWS",
279
  "instanceType": "g6.2xlarge",
280
  "cloud": "AWS",
281
  "gpu": "1xNVIDIA L4",
 
291
  ],
292
  },
293
  {
294
+ "region": "AWS",
295
  "instanceType": "g6.12xlarge",
296
  "cloud": "AWS",
297
  "gpu": "4xNVIDIA L4",
 
303
  "notes": "same as g5?",
304
  },
305
  {
306
+ "region": "AWS",
307
  "instanceType": "g6.48xlarge",
308
  "cloud": "AWS",
309
  "gpu": "8xNVIDIA L4",
 
315
  "notes": "same as g5?",
316
  },
317
  {
318
+ "region": "AWS",
319
  "instanceType": "g6e.2xlarge",
320
  "cloud": "AWS",
321
  "gpu": "1xNVIDIA L40S",
 
323
  "quantization": "none",
324
  "tgi": "TGI 2.2.0",
325
  "status": "OK",
326
+ "tokensPerSecond": "42.1",
327
  },
328
  {
329
+ "region": "AWS",
330
+ "instanceType": "g6e.2xlarge",
331
+ "cloud": "AWS",
332
+ "gpu": "1xNVIDIA L40S",
333
+ "gpuRAM": "48 GB",
334
+ "quantization": "none",
335
+ "tgi": "SGLang 0.2.13",
336
+ "status": "OK",
337
+ "tokensPerSecond": "45",
338
+ },
339
+ {
340
+ "region": "AWS",
341
  "instanceType": "p4d.24xlarge",
342
  "cloud": "AWS",
343
  "gpu": "4xNVIDIA A100",
 
349
  "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
350
  },
351
  {
352
+ "region": "AWS",
353
  "instanceType": "inf2.*",
354
  "cloud": "AWS",
355
  "gpu": "-",