Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Dec 6, 2024

Commit

86d92de

1 Parent(s): 10318fd

Remove FCV1

Browse files

Files changed (3) hide show

results.py +0 -2
results_arcee_fcv1.py +0 -88
results_virtuoso_medium.py +82 -1

results.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Module containing model configuration results for various AI models and hardware setups."""
 from results_arcee_agent import results_arcee_agent
-from results_arcee_fcv1 import results_arcee_fcv1
 from results_arcee_lite import results_arcee_lite
 from results_arcee_meraj import results_arcee_meraj
 from results_arcee_nova import results_arcee_nova
@@ -281,7 +280,6 @@ results = {
         results_arcee_scribe,
         results_llama_supernova_lite,
         results_arcee_supernova_medius,
-        results_arcee_fcv1,
         results_virtuoso_small,
         results_virtuoso_medium,
         results_virtuoso_large,

 """Module containing model configuration results for various AI models and hardware setups."""
 from results_arcee_agent import results_arcee_agent
 from results_arcee_lite import results_arcee_lite
 from results_arcee_meraj import results_arcee_meraj
 from results_arcee_nova import results_arcee_nova
         results_arcee_scribe,
         results_llama_supernova_lite,
         results_arcee_supernova_medius,
         results_virtuoso_small,
         results_virtuoso_medium,
         results_virtuoso_large,

results_arcee_fcv1.py DELETED Viewed

@@ -1,88 +0,0 @@
-"""Module containing performance results for the Arcee-FCV1 model."""
-results_arcee_fcv1 = {
-    "name": "Arcee-FCV1",
-    "modelType": "Qwen2.5 32B",
-    "configurations": [
-        {
-            "instanceType": "r8g.4xlarge",
-            "quantization": "Q4_0_4_8",
-            "container": "llama.cpp 11/27/24",
-            "status": "OK",
-            "tokensPerSecond": "10.5",
-            "notes": "-fa",
-        },
-        {
-            "instanceType": "g5.12xlarge",
-            "quantization": "none",
-            "container": "LMI 0.30+vLLM 0.6.2",
-            "status": "OK",
-            "tokensPerSecond": "23",
-            "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
-        },
-        {
-            "instanceType": "g6.12xlarge",
-            "quantization": "none",
-            "container": "LMI 0.30+vLLM 0.6.2",
-            "status": "OK",
-            "tokensPerSecond": "14",
-            "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
-        },
-        {
-            "instanceType": "g6e.12xlarge",
-            "quantization": "none",
-            "container": "vLLM0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "36",
-            "notes": "--tensor-parallel-size 4 --max-model-len 16384",
-        },
-        {
-            "instanceType": "g6e.12xlarge (2 GPUs)",
-            "quantization": "none",
-            "container": "vLLM 0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "21",
-            "notes": "--tensor-parallel-size 2 --max-model-len 16384",
-        },
-        {
-            "instanceType": "p4d.24xlarge",
-            "quantization": "none",
-            "container": "LMI 0.30+vLLM 0.6.2",
-            "status": "OK",
-            "tokensPerSecond": "72.5",
-            "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
-        },
-        {
-            "instanceType": "p5.48xlarge",
-            "quantization": "none",
-            "container": "vLLM 0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "117",
-            "notes": "--tensor-parallel-size 8",
-        },
-        {
-            "instanceType": "p5.48xlarge (4 GPUs)",
-            "quantization": "none",
-            "container": "vLLM 0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "88",
-            "notes": "--tensor-parallel-size 4",
-        },
-        {
-            "instanceType": "p5.48xlarge (2 GPUs)",
-            "quantization": "none",
-            "container": "vLLM 0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "58",
-            "notes": "--tensor-parallel-size 2",
-        },
-        {
-            "instanceType": "p5.48xlarge (1 GPU)",
-            "quantization": "none",
-            "container": "vLLM 0.6.4.post1",
-            "status": "OK",
-            "tokensPerSecond": "38",
-            "notes": "--tensor-parallel-size 1",
-        },
-    ],
-}

results_virtuoso_medium.py CHANGED Viewed

@@ -3,5 +3,86 @@
 results_virtuoso_medium = {
     "name": "Virtuoso-Medium",
     "modelType": "Qwen2.5 32B",
-    "configurations": [],
 }

 results_virtuoso_medium = {
     "name": "Virtuoso-Medium",
     "modelType": "Qwen2.5 32B",
+    "configurations": [
+        {
+            "instanceType": "r8g.4xlarge",
+            "quantization": "Q4_0_4_8",
+            "container": "llama.cpp 11/27/24",
+            "status": "OK",
+            "tokensPerSecond": "10.5",
+            "notes": "-fa",
+        },
+        {
+            "instanceType": "g5.12xlarge",
+            "quantization": "none",
+            "container": "LMI 0.30+vLLM 0.6.2",
+            "status": "OK",
+            "tokensPerSecond": "23",
+            "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
+        },
+        {
+            "instanceType": "g6.12xlarge",
+            "quantization": "none",
+            "container": "LMI 0.30+vLLM 0.6.2",
+            "status": "OK",
+            "tokensPerSecond": "14",
+            "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
+        },
+        {
+            "instanceType": "g6e.12xlarge",
+            "quantization": "none",
+            "container": "vLLM0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "36",
+            "notes": "--tensor-parallel-size 4 --max-model-len 16384",
+        },
+        {
+            "instanceType": "g6e.12xlarge (2 GPUs)",
+            "quantization": "none",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "21",
+            "notes": "--tensor-parallel-size 2 --max-model-len 16384",
+        },
+        {
+            "instanceType": "p4d.24xlarge",
+            "quantization": "none",
+            "container": "LMI 0.30+vLLM 0.6.2",
+            "status": "OK",
+            "tokensPerSecond": "72.5",
+            "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
+        },
+        {
+            "instanceType": "p5.48xlarge",
+            "quantization": "none",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "117",
+            "notes": "--tensor-parallel-size 8",
+        },
+        {
+            "instanceType": "p5.48xlarge (4 GPUs)",
+            "quantization": "none",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "88",
+            "notes": "--tensor-parallel-size 4",
+        },
+        {
+            "instanceType": "p5.48xlarge (2 GPUs)",
+            "quantization": "none",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "58",
+            "notes": "--tensor-parallel-size 2",
+        },
+        {
+            "instanceType": "p5.48xlarge (1 GPU)",
+            "quantization": "none",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "38",
+            "notes": "--tensor-parallel-size 1",
+        },
+    ],
 }