Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Sep 4

Commit

f7eda6a

•

1 Parent(s): efaad9e

Add Scribe on g6e.2xlarge with SGLang

Browse files

Files changed (1) hide show

results.py +17 -7

results.py CHANGED Viewed

@@ -528,7 +528,7 @@ results = {
             "modelType": "InternLM2.5 8B",
             "configurations": [
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g5.2xlarge",
                     "gpu": "1xNVIDIA A10G",
                     "gpuRAM": "24 GB",
@@ -539,7 +539,7 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g5.12xlarge",
                     "gpu": "4xNVIDIA A10G",
                     "gpuRAM": "96 GB",
@@ -550,7 +550,7 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g5.48xlarge",
                     "gpu": "8xNVIDIA A10G",
                     "gpuRAM": "192 GB",
@@ -561,7 +561,7 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g6.2xlarge",
                     "gpu": "1xNVIDIA L4",
                     "gpuRAM": "24 GB",
@@ -572,7 +572,7 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "4096"',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g6.12xlarge",
                     "gpu": "4xNVIDIA L4",
                     "gpuRAM": "96 GB",
@@ -583,7 +583,7 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "g6.48xlarge",
                     "gpu": "8xNVIDIA L4",
                     "gpuRAM": "192 GB",
@@ -594,7 +594,17 @@ results = {
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
-                    "cloud": "us-west-2",
                     "instanceType": "p4d.24xlarge",
                     "gpu": "4xNVIDIA A100",
                     "gpuRAM": "320 GB",

             "modelType": "InternLM2.5 8B",
             "configurations": [
                 {
+                    "cloud": "AWS",
                     "instanceType": "g5.2xlarge",
                     "gpu": "1xNVIDIA A10G",
                     "gpuRAM": "24 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
+                    "cloud": "AWS",
                     "instanceType": "g5.12xlarge",
                     "gpu": "4xNVIDIA A10G",
                     "gpuRAM": "96 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ',
                 },
                 {
+                    "cloud": "AWS",
                     "instanceType": "g5.48xlarge",
                     "gpu": "8xNVIDIA A10G",
                     "gpuRAM": "192 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
+                    "cloud": "AWS",
                     "instanceType": "g6.2xlarge",
                     "gpu": "1xNVIDIA L4",
                     "gpuRAM": "24 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "4096"',
                 },
                 {
+                    "cloud": "AWS",
                     "instanceType": "g6.12xlarge",
                     "gpu": "4xNVIDIA L4",
                     "gpuRAM": "96 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
+                    "cloud": "AWS",
                     "instanceType": "g6.48xlarge",
                     "gpu": "8xNVIDIA L4",
                     "gpuRAM": "192 GB",
                     "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
                 },
                 {
+                    "cloud": "AWS",
+                    "instanceType": "g6e.2xlarge",
+                    "gpu": "1xNVIDIA L40S",
+                    "gpuRAM": "48 GB",
+                    "quantization": "none",
+                    "tgi": "SGLang 0.2.13",
+                    "status": "OK",
+                    "tokensPerSecond": 46,
+                },
+                {
+                    "cloud": "AWS",
                     "instanceType": "p4d.24xlarge",
                     "gpu": "4xNVIDIA A100",
                     "gpuRAM": "320 GB",