Spaces:
Running
Running
Julien Simon
commited on
Commit
•
f7eda6a
1
Parent(s):
efaad9e
Add Scribe on g6e.2xlarge with SGLang
Browse files- results.py +17 -7
results.py
CHANGED
@@ -528,7 +528,7 @@ results = {
|
|
528 |
"modelType": "InternLM2.5 8B",
|
529 |
"configurations": [
|
530 |
{
|
531 |
-
"cloud": "
|
532 |
"instanceType": "g5.2xlarge",
|
533 |
"gpu": "1xNVIDIA A10G",
|
534 |
"gpuRAM": "24 GB",
|
@@ -539,7 +539,7 @@ results = {
|
|
539 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
540 |
},
|
541 |
{
|
542 |
-
"cloud": "
|
543 |
"instanceType": "g5.12xlarge",
|
544 |
"gpu": "4xNVIDIA A10G",
|
545 |
"gpuRAM": "96 GB",
|
@@ -550,7 +550,7 @@ results = {
|
|
550 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
|
551 |
},
|
552 |
{
|
553 |
-
"cloud": "
|
554 |
"instanceType": "g5.48xlarge",
|
555 |
"gpu": "8xNVIDIA A10G",
|
556 |
"gpuRAM": "192 GB",
|
@@ -561,7 +561,7 @@ results = {
|
|
561 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
562 |
},
|
563 |
{
|
564 |
-
"cloud": "
|
565 |
"instanceType": "g6.2xlarge",
|
566 |
"gpu": "1xNVIDIA L4",
|
567 |
"gpuRAM": "24 GB",
|
@@ -572,7 +572,7 @@ results = {
|
|
572 |
"notes": '"OPTION_MAX_MODEL_LEN": "4096"',
|
573 |
},
|
574 |
{
|
575 |
-
"cloud": "
|
576 |
"instanceType": "g6.12xlarge",
|
577 |
"gpu": "4xNVIDIA L4",
|
578 |
"gpuRAM": "96 GB",
|
@@ -583,7 +583,7 @@ results = {
|
|
583 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
584 |
},
|
585 |
{
|
586 |
-
"cloud": "
|
587 |
"instanceType": "g6.48xlarge",
|
588 |
"gpu": "8xNVIDIA L4",
|
589 |
"gpuRAM": "192 GB",
|
@@ -594,7 +594,17 @@ results = {
|
|
594 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
595 |
},
|
596 |
{
|
597 |
-
"cloud": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
"instanceType": "p4d.24xlarge",
|
599 |
"gpu": "4xNVIDIA A100",
|
600 |
"gpuRAM": "320 GB",
|
|
|
528 |
"modelType": "InternLM2.5 8B",
|
529 |
"configurations": [
|
530 |
{
|
531 |
+
"cloud": "AWS",
|
532 |
"instanceType": "g5.2xlarge",
|
533 |
"gpu": "1xNVIDIA A10G",
|
534 |
"gpuRAM": "24 GB",
|
|
|
539 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
540 |
},
|
541 |
{
|
542 |
+
"cloud": "AWS",
|
543 |
"instanceType": "g5.12xlarge",
|
544 |
"gpu": "4xNVIDIA A10G",
|
545 |
"gpuRAM": "96 GB",
|
|
|
550 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
|
551 |
},
|
552 |
{
|
553 |
+
"cloud": "AWS",
|
554 |
"instanceType": "g5.48xlarge",
|
555 |
"gpu": "8xNVIDIA A10G",
|
556 |
"gpuRAM": "192 GB",
|
|
|
561 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
562 |
},
|
563 |
{
|
564 |
+
"cloud": "AWS",
|
565 |
"instanceType": "g6.2xlarge",
|
566 |
"gpu": "1xNVIDIA L4",
|
567 |
"gpuRAM": "24 GB",
|
|
|
572 |
"notes": '"OPTION_MAX_MODEL_LEN": "4096"',
|
573 |
},
|
574 |
{
|
575 |
+
"cloud": "AWS",
|
576 |
"instanceType": "g6.12xlarge",
|
577 |
"gpu": "4xNVIDIA L4",
|
578 |
"gpuRAM": "96 GB",
|
|
|
583 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
584 |
},
|
585 |
{
|
586 |
+
"cloud": "AWS",
|
587 |
"instanceType": "g6.48xlarge",
|
588 |
"gpu": "8xNVIDIA L4",
|
589 |
"gpuRAM": "192 GB",
|
|
|
594 |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
595 |
},
|
596 |
{
|
597 |
+
"cloud": "AWS",
|
598 |
+
"instanceType": "g6e.2xlarge",
|
599 |
+
"gpu": "1xNVIDIA L40S",
|
600 |
+
"gpuRAM": "48 GB",
|
601 |
+
"quantization": "none",
|
602 |
+
"tgi": "SGLang 0.2.13",
|
603 |
+
"status": "OK",
|
604 |
+
"tokensPerSecond": 46,
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"cloud": "AWS",
|
608 |
"instanceType": "p4d.24xlarge",
|
609 |
"gpu": "4xNVIDIA A100",
|
610 |
"gpuRAM": "320 GB",
|