Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on 29 days ago

Commit

316f1a9

•

1 Parent(s): a49294b

Add H100 for Nova and SuperNova

Browse files

Files changed (1) hide show

results_arcee_supernova.py +55 -53

results_arcee_supernova.py CHANGED Viewed

@@ -62,59 +62,11 @@ results_arcee_supernova = {
         },
         {
             "instanceType": "p5.48xlarge",
-            "configurations": [
-                {
-                    "quantization": "awq",
-                    "container": "TGI 2.2.0",
-                    "status": "OK",
-                    "tokensPerSecond": "73",
-                    "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
-                },
-                {
-                    "quantization": "none",
-                    "container": "TGI 2.2.0",
-                    "status": "OK",
-                    "tokensPerSecond": "58",
-                    "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
-                },
-                {
-                    "quantization": "none",
-                    "container": "LMI 0.29+vLLM 0.5.5",
-                    "status": "OK",
-                    "tokensPerSecond": "70",
-                    "notes": "OPTION_MAX_MODEL_LEN 128k",
-                },
-                {
-                    "quantization": "none",
-                    "container": "LMI 0.29+vLLM 0.5.5",
-                    "status": "OK",
-                    "tokensPerSecond": "70",
-                    "notes": "OPTION_ENFORCE_EAGER=True",
-                },
-                {
-                    "quantization": "None",
-                    "container": "vLLM 0.6.4.post1",
-                    "status": "OK",
-                    "tokensPerSecond": "77",
-                    "notes": "--tensor-parallel-size 8",
-                },
-                {
-                    "quantization": "None",
-                    "container": "vLLM 0.6.4.post1",
-                    "status": "OK",
-                    "tokensPerSecond": "53",
-                    "notes": "--tensor-parallel-size 4",
-                    "gpuCount": "4",
-                },
-                {
-                    "quantization": "None",
-                    "container": "vLLM 0.6.4.post1",
-                    "status": "OK",
-                    "tokensPerSecond": "33",
-                    "notes": "--tensor-parallel-size 2 --gpu_memory-utilization 0.95",
-                    "gpuCount": "2",
-                },
-            ],
         },
         {
             "instanceType": "inf2.24xlarge",
@@ -206,5 +158,55 @@ results_arcee_supernova = {
                 },
             ],
         },
     ],
 }

         },
         {
             "instanceType": "p5.48xlarge",
+            "quantization": "awq",
+            "container": "TGI 2.2.0",
+            "status": "OK",
+            "tokensPerSecond": "73",
+            "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
         },
         {
             "instanceType": "inf2.24xlarge",
                 },
             ],
         },
+        {
+            "instanceType": "p5.48xlarge",
+            "configurations": [
+                {
+                    "quantization": "none",
+                    "container": "TGI 2.2.0",
+                    "status": "OK",
+                    "tokensPerSecond": "58",
+                    "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
+                },
+                {
+                    "quantization": "none",
+                    "container": "LMI 0.29+vLLM 0.5.5",
+                    "status": "OK",
+                    "tokensPerSecond": "70",
+                    "notes": "OPTION_MAX_MODEL_LEN 128k",
+                },
+                {
+                    "quantization": "none",
+                    "container": "LMI 0.29+vLLM 0.5.5",
+                    "status": "OK",
+                    "tokensPerSecond": "70",
+                    "notes": "OPTION_ENFORCE_EAGER=True",
+                },
+            ],
+        },
+        {
+            "instanceType": "p5.48xlarge",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "N/A",
+            "tokensPerSecond": "77",
+            "notes": "--tensor-parallel-size 8",
+        },
+        {
+            "instanceType": "p5.48xlarge (4 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "53",
+            "notes": "--tensor-parallel-size 4",
+        },
+        {
+            "instanceType": "p5.48xlarge (2 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "xxx",
+            "notes": "--tensor-parallel-size 2",
+        },
     ],
 }