Spaces:
Running
Running
Julien Simon
commited on
Commit
•
7dff48b
1
Parent(s):
12fc436
Add cost-performance index (CPI)
Browse files- results.py +20 -2
- results_arcee_agent.py +0 -7
- results_llama_spark.py +1 -1
results.py
CHANGED
@@ -16,78 +16,91 @@ instance_type_mappings = {
|
|
16 |
"gpu": "1xNVIDIA A10G",
|
17 |
"gpuRAM": "24 GB",
|
18 |
"url": "https://instances.vantage.sh/aws/ec2/g5.xlarge",
|
|
|
19 |
},
|
20 |
"g5.2xlarge": {
|
21 |
"cloud": "AWS",
|
22 |
"gpu": "1xNVIDIA A10G",
|
23 |
"gpuRAM": "24 GB",
|
24 |
"url": "https://instances.vantage.sh/aws/ec2/g5.2xlarge",
|
|
|
25 |
},
|
26 |
"g5.12xlarge": {
|
27 |
"cloud": "AWS",
|
28 |
"gpu": "4xNVIDIA A10G",
|
29 |
"gpuRAM": "96 GB",
|
30 |
"url": "https://instances.vantage.sh/aws/ec2/g5.12xlarge",
|
|
|
31 |
},
|
32 |
"g5.48xlarge": {
|
33 |
"cloud": "AWS",
|
34 |
"gpu": "8xNVIDIA A10G",
|
35 |
"gpuRAM": "192 GB",
|
36 |
"url": "https://instances.vantage.sh/aws/ec2/g5.48xlarge",
|
|
|
37 |
},
|
38 |
"g6.2xlarge": {
|
39 |
"cloud": "AWS",
|
40 |
"gpu": "1xNVIDIA L4",
|
41 |
"gpuRAM": "24 GB",
|
42 |
"url": "https://instances.vantage.sh/aws/ec2/g6.2xlarge",
|
|
|
43 |
},
|
44 |
"g6.12xlarge": {
|
45 |
"cloud": "AWS",
|
46 |
"gpu": "4xNVIDIA L4",
|
47 |
"gpuRAM": "96 GB",
|
48 |
"url": "https://instances.vantage.sh/aws/ec2/g6.12xlarge",
|
|
|
49 |
},
|
50 |
"g6.48xlarge": {
|
51 |
"cloud": "AWS",
|
52 |
"gpu": "8xNVIDIA L4",
|
53 |
"gpuRAM": "192 GB",
|
54 |
"url": "https://instances.vantage.sh/aws/ec2/g6.48xlarge",
|
|
|
55 |
},
|
56 |
"g6e.2xlarge": {
|
57 |
"cloud": "AWS",
|
58 |
"gpu": "1xNVIDIA L40S",
|
59 |
"gpuRAM": "48 GB",
|
60 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.2xlarge",
|
|
|
61 |
},
|
62 |
"g6e.12xlarge": {
|
63 |
"cloud": "AWS",
|
64 |
"gpu": "4xNVIDIA L40S",
|
65 |
"gpuRAM": "192 GB",
|
66 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
|
|
|
67 |
},
|
68 |
"g4dn.12xlarge": {
|
69 |
"cloud": "AWS",
|
70 |
"gpu": "4xNVIDIA T4",
|
71 |
"gpuRAM": "64 GB",
|
72 |
"url": "https://instances.vantage.sh/aws/ec2/g4dn.12xlarge",
|
|
|
73 |
},
|
74 |
"p4d.24xlarge": {
|
75 |
"cloud": "AWS",
|
76 |
"gpu": "4xNVIDIA A100",
|
77 |
"gpuRAM": "320 GB",
|
78 |
"url": "https://instances.vantage.sh/aws/ec2/p4d.24xlarge",
|
|
|
79 |
},
|
80 |
"p4de.24xlarge": {
|
81 |
"cloud": "AWS",
|
82 |
"gpu": "8xNVIDIA A100",
|
83 |
"gpuRAM": "320 GB",
|
84 |
"url": "https://instances.vantage.sh/aws/ec2/p4de.24xlarge",
|
|
|
85 |
},
|
86 |
"p5.48xlarge": {
|
87 |
"cloud": "AWS",
|
88 |
"gpu": "8xNVIDIA H100",
|
89 |
"gpuRAM": "640GB",
|
90 |
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
|
|
91 |
},
|
92 |
"c6i.xlarge": {
|
93 |
"cloud": "AWS",
|
@@ -100,36 +113,42 @@ instance_type_mappings = {
|
|
100 |
"gpu": "-",
|
101 |
"gpuRAM": "-",
|
102 |
"url": "https://instances.vantage.sh/aws/ec2/c6i.2xlarge",
|
|
|
103 |
},
|
104 |
"c6i.4xlarge": {
|
105 |
"cloud": "AWS",
|
106 |
"gpu": "-",
|
107 |
"gpuRAM": "-",
|
108 |
"url": "https://instances.vantage.sh/aws/ec2/c6i.4xlarge",
|
|
|
109 |
},
|
110 |
"c7i.4xlarge": {
|
111 |
"cloud": "AWS",
|
112 |
"gpu": "-",
|
113 |
"gpuRAM": "-",
|
114 |
"url": "https://instances.vantage.sh/aws/ec2/c7i.4xlarge",
|
|
|
115 |
},
|
116 |
-
"inf2.
|
117 |
"cloud": "AWS",
|
118 |
"gpu": "2xNeuronCore v2",
|
119 |
"gpuRAM": "32 GB",
|
120 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.2xlarge",
|
|
|
121 |
},
|
122 |
"inf2.8xlarge": {
|
123 |
"cloud": "AWS",
|
124 |
"gpu": "1xNeuronCore v2",
|
125 |
"gpuRAM": "32 GB",
|
126 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.8xlarge",
|
|
|
127 |
},
|
128 |
"inf2.24xlarge": {
|
129 |
"cloud": "AWS",
|
130 |
"gpu": "12xNeuronCore v2",
|
131 |
"gpuRAM": "192 GB",
|
132 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.24xlarge",
|
|
|
133 |
},
|
134 |
"inf2.48xlarge": {
|
135 |
"cloud": "AWS",
|
@@ -137,7 +156,6 @@ instance_type_mappings = {
|
|
137 |
"gpuRAM": "384 GB",
|
138 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
|
139 |
},
|
140 |
-
"inf2.*": {"cloud": "AWS", "gpu": "-", "gpuRAM": "-", "url": ""},
|
141 |
}
|
142 |
|
143 |
results = {
|
|
|
16 |
"gpu": "1xNVIDIA A10G",
|
17 |
"gpuRAM": "24 GB",
|
18 |
"url": "https://instances.vantage.sh/aws/ec2/g5.xlarge",
|
19 |
+
"price": 1.006,
|
20 |
},
|
21 |
"g5.2xlarge": {
|
22 |
"cloud": "AWS",
|
23 |
"gpu": "1xNVIDIA A10G",
|
24 |
"gpuRAM": "24 GB",
|
25 |
"url": "https://instances.vantage.sh/aws/ec2/g5.2xlarge",
|
26 |
+
"price": 1.212,
|
27 |
},
|
28 |
"g5.12xlarge": {
|
29 |
"cloud": "AWS",
|
30 |
"gpu": "4xNVIDIA A10G",
|
31 |
"gpuRAM": "96 GB",
|
32 |
"url": "https://instances.vantage.sh/aws/ec2/g5.12xlarge",
|
33 |
+
"price": 5.672,
|
34 |
},
|
35 |
"g5.48xlarge": {
|
36 |
"cloud": "AWS",
|
37 |
"gpu": "8xNVIDIA A10G",
|
38 |
"gpuRAM": "192 GB",
|
39 |
"url": "https://instances.vantage.sh/aws/ec2/g5.48xlarge",
|
40 |
+
"price": 16.288,
|
41 |
},
|
42 |
"g6.2xlarge": {
|
43 |
"cloud": "AWS",
|
44 |
"gpu": "1xNVIDIA L4",
|
45 |
"gpuRAM": "24 GB",
|
46 |
"url": "https://instances.vantage.sh/aws/ec2/g6.2xlarge",
|
47 |
+
"price": 0.9776,
|
48 |
},
|
49 |
"g6.12xlarge": {
|
50 |
"cloud": "AWS",
|
51 |
"gpu": "4xNVIDIA L4",
|
52 |
"gpuRAM": "96 GB",
|
53 |
"url": "https://instances.vantage.sh/aws/ec2/g6.12xlarge",
|
54 |
+
"price": 4.602,
|
55 |
},
|
56 |
"g6.48xlarge": {
|
57 |
"cloud": "AWS",
|
58 |
"gpu": "8xNVIDIA L4",
|
59 |
"gpuRAM": "192 GB",
|
60 |
"url": "https://instances.vantage.sh/aws/ec2/g6.48xlarge",
|
61 |
+
"price": 13.35,
|
62 |
},
|
63 |
"g6e.2xlarge": {
|
64 |
"cloud": "AWS",
|
65 |
"gpu": "1xNVIDIA L40S",
|
66 |
"gpuRAM": "48 GB",
|
67 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.2xlarge",
|
68 |
+
"price": 2.242,
|
69 |
},
|
70 |
"g6e.12xlarge": {
|
71 |
"cloud": "AWS",
|
72 |
"gpu": "4xNVIDIA L40S",
|
73 |
"gpuRAM": "192 GB",
|
74 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
|
75 |
+
"price": 10.493,
|
76 |
},
|
77 |
"g4dn.12xlarge": {
|
78 |
"cloud": "AWS",
|
79 |
"gpu": "4xNVIDIA T4",
|
80 |
"gpuRAM": "64 GB",
|
81 |
"url": "https://instances.vantage.sh/aws/ec2/g4dn.12xlarge",
|
82 |
+
"price": 3.912,
|
83 |
},
|
84 |
"p4d.24xlarge": {
|
85 |
"cloud": "AWS",
|
86 |
"gpu": "4xNVIDIA A100",
|
87 |
"gpuRAM": "320 GB",
|
88 |
"url": "https://instances.vantage.sh/aws/ec2/p4d.24xlarge",
|
89 |
+
"price": 32.773,
|
90 |
},
|
91 |
"p4de.24xlarge": {
|
92 |
"cloud": "AWS",
|
93 |
"gpu": "8xNVIDIA A100",
|
94 |
"gpuRAM": "320 GB",
|
95 |
"url": "https://instances.vantage.sh/aws/ec2/p4de.24xlarge",
|
96 |
+
"price": 40.966,
|
97 |
},
|
98 |
"p5.48xlarge": {
|
99 |
"cloud": "AWS",
|
100 |
"gpu": "8xNVIDIA H100",
|
101 |
"gpuRAM": "640GB",
|
102 |
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
103 |
+
"price": 98.32,
|
104 |
},
|
105 |
"c6i.xlarge": {
|
106 |
"cloud": "AWS",
|
|
|
113 |
"gpu": "-",
|
114 |
"gpuRAM": "-",
|
115 |
"url": "https://instances.vantage.sh/aws/ec2/c6i.2xlarge",
|
116 |
+
"price": 0.34,
|
117 |
},
|
118 |
"c6i.4xlarge": {
|
119 |
"cloud": "AWS",
|
120 |
"gpu": "-",
|
121 |
"gpuRAM": "-",
|
122 |
"url": "https://instances.vantage.sh/aws/ec2/c6i.4xlarge",
|
123 |
+
"price": 0.68,
|
124 |
},
|
125 |
"c7i.4xlarge": {
|
126 |
"cloud": "AWS",
|
127 |
"gpu": "-",
|
128 |
"gpuRAM": "-",
|
129 |
"url": "https://instances.vantage.sh/aws/ec2/c7i.4xlarge",
|
130 |
+
"price": 0.714,
|
131 |
},
|
132 |
+
"inf2.xlarge": {
|
133 |
"cloud": "AWS",
|
134 |
"gpu": "2xNeuronCore v2",
|
135 |
"gpuRAM": "32 GB",
|
136 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.2xlarge",
|
137 |
+
"price": 0.7582,
|
138 |
},
|
139 |
"inf2.8xlarge": {
|
140 |
"cloud": "AWS",
|
141 |
"gpu": "1xNeuronCore v2",
|
142 |
"gpuRAM": "32 GB",
|
143 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.8xlarge",
|
144 |
+
"price": 1.968,
|
145 |
},
|
146 |
"inf2.24xlarge": {
|
147 |
"cloud": "AWS",
|
148 |
"gpu": "12xNeuronCore v2",
|
149 |
"gpuRAM": "192 GB",
|
150 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.24xlarge",
|
151 |
+
"price": 6.491,
|
152 |
},
|
153 |
"inf2.48xlarge": {
|
154 |
"cloud": "AWS",
|
|
|
156 |
"gpuRAM": "384 GB",
|
157 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
|
158 |
},
|
|
|
159 |
}
|
160 |
|
161 |
results = {
|
results_arcee_agent.py
CHANGED
@@ -41,13 +41,6 @@ results_arcee_agent = {
|
|
41 |
"status": "OK",
|
42 |
"tokensPerSecond": "54.2",
|
43 |
},
|
44 |
-
{
|
45 |
-
"instanceType": "inf2.*",
|
46 |
-
"container": "TGI 2.2.0",
|
47 |
-
"status": "not supported",
|
48 |
-
"tokensPerSecond": "-",
|
49 |
-
"notes": "Qwen2: TGI OK, Neuron SDK KO, optimum-neuron KO",
|
50 |
-
},
|
51 |
{
|
52 |
"instanceType": "g6e.2xlarge",
|
53 |
"configurations": [
|
|
|
41 |
"status": "OK",
|
42 |
"tokensPerSecond": "54.2",
|
43 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
{
|
45 |
"instanceType": "g6e.2xlarge",
|
46 |
"configurations": [
|
results_llama_spark.py
CHANGED
@@ -99,7 +99,7 @@ results_llama_spark = {
|
|
99 |
"notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
|
100 |
},
|
101 |
{
|
102 |
-
"instanceType": "inf2.
|
103 |
"container": "LMI 0.29+transformers-neuronx 0.11.351",
|
104 |
"quantization": "none",
|
105 |
"status": "OK",
|
|
|
99 |
"notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
|
100 |
},
|
101 |
{
|
102 |
+
"instanceType": "inf2.xlarge",
|
103 |
"container": "LMI 0.29+transformers-neuronx 0.11.351",
|
104 |
"quantization": "none",
|
105 |
"status": "OK",
|