Spaces:
Running
Running
Julien Simon
commited on
Commit
·
86d92de
1
Parent(s):
10318fd
Remove FCV1
Browse files- results.py +0 -2
- results_arcee_fcv1.py +0 -88
- results_virtuoso_medium.py +82 -1
results.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
"""Module containing model configuration results for various AI models and hardware setups."""
|
2 |
|
3 |
from results_arcee_agent import results_arcee_agent
|
4 |
-
from results_arcee_fcv1 import results_arcee_fcv1
|
5 |
from results_arcee_lite import results_arcee_lite
|
6 |
from results_arcee_meraj import results_arcee_meraj
|
7 |
from results_arcee_nova import results_arcee_nova
|
@@ -281,7 +280,6 @@ results = {
|
|
281 |
results_arcee_scribe,
|
282 |
results_llama_supernova_lite,
|
283 |
results_arcee_supernova_medius,
|
284 |
-
results_arcee_fcv1,
|
285 |
results_virtuoso_small,
|
286 |
results_virtuoso_medium,
|
287 |
results_virtuoso_large,
|
|
|
1 |
"""Module containing model configuration results for various AI models and hardware setups."""
|
2 |
|
3 |
from results_arcee_agent import results_arcee_agent
|
|
|
4 |
from results_arcee_lite import results_arcee_lite
|
5 |
from results_arcee_meraj import results_arcee_meraj
|
6 |
from results_arcee_nova import results_arcee_nova
|
|
|
280 |
results_arcee_scribe,
|
281 |
results_llama_supernova_lite,
|
282 |
results_arcee_supernova_medius,
|
|
|
283 |
results_virtuoso_small,
|
284 |
results_virtuoso_medium,
|
285 |
results_virtuoso_large,
|
results_arcee_fcv1.py
DELETED
@@ -1,88 +0,0 @@
|
|
1 |
-
"""Module containing performance results for the Arcee-FCV1 model."""
|
2 |
-
|
3 |
-
results_arcee_fcv1 = {
|
4 |
-
"name": "Arcee-FCV1",
|
5 |
-
"modelType": "Qwen2.5 32B",
|
6 |
-
"configurations": [
|
7 |
-
{
|
8 |
-
"instanceType": "r8g.4xlarge",
|
9 |
-
"quantization": "Q4_0_4_8",
|
10 |
-
"container": "llama.cpp 11/27/24",
|
11 |
-
"status": "OK",
|
12 |
-
"tokensPerSecond": "10.5",
|
13 |
-
"notes": "-fa",
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"instanceType": "g5.12xlarge",
|
17 |
-
"quantization": "none",
|
18 |
-
"container": "LMI 0.30+vLLM 0.6.2",
|
19 |
-
"status": "OK",
|
20 |
-
"tokensPerSecond": "23",
|
21 |
-
"notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"instanceType": "g6.12xlarge",
|
25 |
-
"quantization": "none",
|
26 |
-
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
-
"status": "OK",
|
28 |
-
"tokensPerSecond": "14",
|
29 |
-
"notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
30 |
-
},
|
31 |
-
{
|
32 |
-
"instanceType": "g6e.12xlarge",
|
33 |
-
"quantization": "none",
|
34 |
-
"container": "vLLM0.6.4.post1",
|
35 |
-
"status": "OK",
|
36 |
-
"tokensPerSecond": "36",
|
37 |
-
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"instanceType": "g6e.12xlarge (2 GPUs)",
|
41 |
-
"quantization": "none",
|
42 |
-
"container": "vLLM 0.6.4.post1",
|
43 |
-
"status": "OK",
|
44 |
-
"tokensPerSecond": "21",
|
45 |
-
"notes": "--tensor-parallel-size 2 --max-model-len 16384",
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"instanceType": "p4d.24xlarge",
|
49 |
-
"quantization": "none",
|
50 |
-
"container": "LMI 0.30+vLLM 0.6.2",
|
51 |
-
"status": "OK",
|
52 |
-
"tokensPerSecond": "72.5",
|
53 |
-
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"instanceType": "p5.48xlarge",
|
57 |
-
"quantization": "none",
|
58 |
-
"container": "vLLM 0.6.4.post1",
|
59 |
-
"status": "OK",
|
60 |
-
"tokensPerSecond": "117",
|
61 |
-
"notes": "--tensor-parallel-size 8",
|
62 |
-
},
|
63 |
-
{
|
64 |
-
"instanceType": "p5.48xlarge (4 GPUs)",
|
65 |
-
"quantization": "none",
|
66 |
-
"container": "vLLM 0.6.4.post1",
|
67 |
-
"status": "OK",
|
68 |
-
"tokensPerSecond": "88",
|
69 |
-
"notes": "--tensor-parallel-size 4",
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"instanceType": "p5.48xlarge (2 GPUs)",
|
73 |
-
"quantization": "none",
|
74 |
-
"container": "vLLM 0.6.4.post1",
|
75 |
-
"status": "OK",
|
76 |
-
"tokensPerSecond": "58",
|
77 |
-
"notes": "--tensor-parallel-size 2",
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"instanceType": "p5.48xlarge (1 GPU)",
|
81 |
-
"quantization": "none",
|
82 |
-
"container": "vLLM 0.6.4.post1",
|
83 |
-
"status": "OK",
|
84 |
-
"tokensPerSecond": "38",
|
85 |
-
"notes": "--tensor-parallel-size 1",
|
86 |
-
},
|
87 |
-
],
|
88 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
results_virtuoso_medium.py
CHANGED
@@ -3,5 +3,86 @@
|
|
3 |
results_virtuoso_medium = {
|
4 |
"name": "Virtuoso-Medium",
|
5 |
"modelType": "Qwen2.5 32B",
|
6 |
-
"configurations": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
}
|
|
|
3 |
results_virtuoso_medium = {
|
4 |
"name": "Virtuoso-Medium",
|
5 |
"modelType": "Qwen2.5 32B",
|
6 |
+
"configurations": [
|
7 |
+
{
|
8 |
+
"instanceType": "r8g.4xlarge",
|
9 |
+
"quantization": "Q4_0_4_8",
|
10 |
+
"container": "llama.cpp 11/27/24",
|
11 |
+
"status": "OK",
|
12 |
+
"tokensPerSecond": "10.5",
|
13 |
+
"notes": "-fa",
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"instanceType": "g5.12xlarge",
|
17 |
+
"quantization": "none",
|
18 |
+
"container": "LMI 0.30+vLLM 0.6.2",
|
19 |
+
"status": "OK",
|
20 |
+
"tokensPerSecond": "23",
|
21 |
+
"notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"instanceType": "g6.12xlarge",
|
25 |
+
"quantization": "none",
|
26 |
+
"container": "LMI 0.30+vLLM 0.6.2",
|
27 |
+
"status": "OK",
|
28 |
+
"tokensPerSecond": "14",
|
29 |
+
"notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"instanceType": "g6e.12xlarge",
|
33 |
+
"quantization": "none",
|
34 |
+
"container": "vLLM0.6.4.post1",
|
35 |
+
"status": "OK",
|
36 |
+
"tokensPerSecond": "36",
|
37 |
+
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"instanceType": "g6e.12xlarge (2 GPUs)",
|
41 |
+
"quantization": "none",
|
42 |
+
"container": "vLLM 0.6.4.post1",
|
43 |
+
"status": "OK",
|
44 |
+
"tokensPerSecond": "21",
|
45 |
+
"notes": "--tensor-parallel-size 2 --max-model-len 16384",
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"instanceType": "p4d.24xlarge",
|
49 |
+
"quantization": "none",
|
50 |
+
"container": "LMI 0.30+vLLM 0.6.2",
|
51 |
+
"status": "OK",
|
52 |
+
"tokensPerSecond": "72.5",
|
53 |
+
"notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"instanceType": "p5.48xlarge",
|
57 |
+
"quantization": "none",
|
58 |
+
"container": "vLLM 0.6.4.post1",
|
59 |
+
"status": "OK",
|
60 |
+
"tokensPerSecond": "117",
|
61 |
+
"notes": "--tensor-parallel-size 8",
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"instanceType": "p5.48xlarge (4 GPUs)",
|
65 |
+
"quantization": "none",
|
66 |
+
"container": "vLLM 0.6.4.post1",
|
67 |
+
"status": "OK",
|
68 |
+
"tokensPerSecond": "88",
|
69 |
+
"notes": "--tensor-parallel-size 4",
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"instanceType": "p5.48xlarge (2 GPUs)",
|
73 |
+
"quantization": "none",
|
74 |
+
"container": "vLLM 0.6.4.post1",
|
75 |
+
"status": "OK",
|
76 |
+
"tokensPerSecond": "58",
|
77 |
+
"notes": "--tensor-parallel-size 2",
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"instanceType": "p5.48xlarge (1 GPU)",
|
81 |
+
"quantization": "none",
|
82 |
+
"container": "vLLM 0.6.4.post1",
|
83 |
+
"status": "OK",
|
84 |
+
"tokensPerSecond": "38",
|
85 |
+
"notes": "--tensor-parallel-size 1",
|
86 |
+
},
|
87 |
+
],
|
88 |
}
|