Julien Simon commited on
Commit
f7eda6a
1 Parent(s): efaad9e

Add Scribe on g6e.2xlarge with SGLang

Browse files
Files changed (1) hide show
  1. results.py +17 -7
results.py CHANGED
@@ -528,7 +528,7 @@ results = {
528
  "modelType": "InternLM2.5 8B",
529
  "configurations": [
530
  {
531
- "cloud": "us-west-2",
532
  "instanceType": "g5.2xlarge",
533
  "gpu": "1xNVIDIA A10G",
534
  "gpuRAM": "24 GB",
@@ -539,7 +539,7 @@ results = {
539
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
540
  },
541
  {
542
- "cloud": "us-west-2",
543
  "instanceType": "g5.12xlarge",
544
  "gpu": "4xNVIDIA A10G",
545
  "gpuRAM": "96 GB",
@@ -550,7 +550,7 @@ results = {
550
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
551
  },
552
  {
553
- "cloud": "us-west-2",
554
  "instanceType": "g5.48xlarge",
555
  "gpu": "8xNVIDIA A10G",
556
  "gpuRAM": "192 GB",
@@ -561,7 +561,7 @@ results = {
561
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
562
  },
563
  {
564
- "cloud": "us-west-2",
565
  "instanceType": "g6.2xlarge",
566
  "gpu": "1xNVIDIA L4",
567
  "gpuRAM": "24 GB",
@@ -572,7 +572,7 @@ results = {
572
  "notes": '"OPTION_MAX_MODEL_LEN": "4096"',
573
  },
574
  {
575
- "cloud": "us-west-2",
576
  "instanceType": "g6.12xlarge",
577
  "gpu": "4xNVIDIA L4",
578
  "gpuRAM": "96 GB",
@@ -583,7 +583,7 @@ results = {
583
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
584
  },
585
  {
586
- "cloud": "us-west-2",
587
  "instanceType": "g6.48xlarge",
588
  "gpu": "8xNVIDIA L4",
589
  "gpuRAM": "192 GB",
@@ -594,7 +594,17 @@ results = {
594
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
595
  },
596
  {
597
- "cloud": "us-west-2",
 
 
 
 
 
 
 
 
 
 
598
  "instanceType": "p4d.24xlarge",
599
  "gpu": "4xNVIDIA A100",
600
  "gpuRAM": "320 GB",
 
528
  "modelType": "InternLM2.5 8B",
529
  "configurations": [
530
  {
531
+ "cloud": "AWS",
532
  "instanceType": "g5.2xlarge",
533
  "gpu": "1xNVIDIA A10G",
534
  "gpuRAM": "24 GB",
 
539
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
540
  },
541
  {
542
+ "cloud": "AWS",
543
  "instanceType": "g5.12xlarge",
544
  "gpu": "4xNVIDIA A10G",
545
  "gpuRAM": "96 GB",
 
550
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
551
  },
552
  {
553
+ "cloud": "AWS",
554
  "instanceType": "g5.48xlarge",
555
  "gpu": "8xNVIDIA A10G",
556
  "gpuRAM": "192 GB",
 
561
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
562
  },
563
  {
564
+ "cloud": "AWS",
565
  "instanceType": "g6.2xlarge",
566
  "gpu": "1xNVIDIA L4",
567
  "gpuRAM": "24 GB",
 
572
  "notes": '"OPTION_MAX_MODEL_LEN": "4096"',
573
  },
574
  {
575
+ "cloud": "AWS",
576
  "instanceType": "g6.12xlarge",
577
  "gpu": "4xNVIDIA L4",
578
  "gpuRAM": "96 GB",
 
583
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
584
  },
585
  {
586
+ "cloud": "AWS",
587
  "instanceType": "g6.48xlarge",
588
  "gpu": "8xNVIDIA L4",
589
  "gpuRAM": "192 GB",
 
594
  "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
595
  },
596
  {
597
+ "cloud": "AWS",
598
+ "instanceType": "g6e.2xlarge",
599
+ "gpu": "1xNVIDIA L40S",
600
+ "gpuRAM": "48 GB",
601
+ "quantization": "none",
602
+ "tgi": "SGLang 0.2.13",
603
+ "status": "OK",
604
+ "tokensPerSecond": 46,
605
+ },
606
+ {
607
+ "cloud": "AWS",
608
  "instanceType": "p4d.24xlarge",
609
  "gpu": "4xNVIDIA A100",
610
  "gpuRAM": "320 GB",