Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Sep 4

Commit

6f23d6c

•

1 Parent(s): f7eda6a

Add Llama-Spark on g6e.2xlarge with SGLang

Browse files

Files changed (1) hide show

results.py +21 -10

results.py CHANGED Viewed

@@ -239,7 +239,7 @@ results = {
             "modelType": "Llama 3.1 8B",
             "configurations": [
                 {
-                    "region": "us-west-2",
                     "instanceType": "g5.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA A10G",
@@ -251,7 +251,7 @@ results = {
                     "notes": "4K/8K fails",
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g5.12xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA A10G",
@@ -263,7 +263,7 @@ results = {
                     "notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g5.48xlarge",
                     "cloud": "AWS",
                     "gpu": "8xNVIDIA A10G",
@@ -275,7 +275,7 @@ results = {
                     "notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g6.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA L4",
@@ -291,7 +291,7 @@ results = {
                     ],
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g6.12xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA L4",
@@ -303,7 +303,7 @@ results = {
                     "notes": "same as g5?",
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g6.48xlarge",
                     "cloud": "AWS",
                     "gpu": "8xNVIDIA L4",
@@ -315,7 +315,7 @@ results = {
                     "notes": "same as g5?",
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "g6e.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA L40S",
@@ -323,10 +323,21 @@ results = {
                     "quantization": "none",
                     "tgi": "TGI 2.2.0",
                     "status": "OK",
-                    "tokensPerSecond": "42",
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "p4d.24xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA A100",
@@ -338,7 +349,7 @@ results = {
                     "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
                 },
                 {
-                    "region": "us-west-2",
                     "instanceType": "inf2.*",
                     "cloud": "AWS",
                     "gpu": "-",

             "modelType": "Llama 3.1 8B",
             "configurations": [
                 {
+                    "region": "AWS",
                     "instanceType": "g5.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA A10G",
                     "notes": "4K/8K fails",
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g5.12xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA A10G",
                     "notes": '"MAX_INPUT_TOKENS": "16384", "MAX_TOTAL_TOKENS": "32768",',
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g5.48xlarge",
                     "cloud": "AWS",
                     "gpu": "8xNVIDIA A10G",
                     "notes": '"MAX_INPUT_TOKENS": "20480", "MAX_TOTAL_TOKENS": "40960"\n\n32K/64K fails',
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g6.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA L4",
                     ],
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g6.12xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA L4",
                     "notes": "same as g5?",
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g6.48xlarge",
                     "cloud": "AWS",
                     "gpu": "8xNVIDIA L4",
                     "notes": "same as g5?",
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "g6e.2xlarge",
                     "cloud": "AWS",
                     "gpu": "1xNVIDIA L40S",
                     "quantization": "none",
                     "tgi": "TGI 2.2.0",
                     "status": "OK",
+                    "tokensPerSecond": "42.1",
                 },
                 {
+                    "region": "AWS",
+                    "instanceType": "g6e.2xlarge",
+                    "cloud": "AWS",
+                    "gpu": "1xNVIDIA L40S",
+                    "gpuRAM": "48 GB",
+                    "quantization": "none",
+                    "tgi": "SGLang 0.2.13",
+                    "status": "OK",
+                    "tokensPerSecond": "45",
+                },
+                {
+                    "region": "AWS",
                     "instanceType": "p4d.24xlarge",
                     "cloud": "AWS",
                     "gpu": "4xNVIDIA A100",
                     "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
                 },
                 {
+                    "region": "AWS",
                     "instanceType": "inf2.*",
                     "cloud": "AWS",
                     "gpu": "-",