Julien Simon commited on
Commit
7dff48b
1 Parent(s): 12fc436

Add cost-performance index (CPI)

Browse files
Files changed (3) hide show
  1. results.py +20 -2
  2. results_arcee_agent.py +0 -7
  3. results_llama_spark.py +1 -1
results.py CHANGED
@@ -16,78 +16,91 @@ instance_type_mappings = {
16
  "gpu": "1xNVIDIA A10G",
17
  "gpuRAM": "24 GB",
18
  "url": "https://instances.vantage.sh/aws/ec2/g5.xlarge",
 
19
  },
20
  "g5.2xlarge": {
21
  "cloud": "AWS",
22
  "gpu": "1xNVIDIA A10G",
23
  "gpuRAM": "24 GB",
24
  "url": "https://instances.vantage.sh/aws/ec2/g5.2xlarge",
 
25
  },
26
  "g5.12xlarge": {
27
  "cloud": "AWS",
28
  "gpu": "4xNVIDIA A10G",
29
  "gpuRAM": "96 GB",
30
  "url": "https://instances.vantage.sh/aws/ec2/g5.12xlarge",
 
31
  },
32
  "g5.48xlarge": {
33
  "cloud": "AWS",
34
  "gpu": "8xNVIDIA A10G",
35
  "gpuRAM": "192 GB",
36
  "url": "https://instances.vantage.sh/aws/ec2/g5.48xlarge",
 
37
  },
38
  "g6.2xlarge": {
39
  "cloud": "AWS",
40
  "gpu": "1xNVIDIA L4",
41
  "gpuRAM": "24 GB",
42
  "url": "https://instances.vantage.sh/aws/ec2/g6.2xlarge",
 
43
  },
44
  "g6.12xlarge": {
45
  "cloud": "AWS",
46
  "gpu": "4xNVIDIA L4",
47
  "gpuRAM": "96 GB",
48
  "url": "https://instances.vantage.sh/aws/ec2/g6.12xlarge",
 
49
  },
50
  "g6.48xlarge": {
51
  "cloud": "AWS",
52
  "gpu": "8xNVIDIA L4",
53
  "gpuRAM": "192 GB",
54
  "url": "https://instances.vantage.sh/aws/ec2/g6.48xlarge",
 
55
  },
56
  "g6e.2xlarge": {
57
  "cloud": "AWS",
58
  "gpu": "1xNVIDIA L40S",
59
  "gpuRAM": "48 GB",
60
  "url": "https://instances.vantage.sh/aws/ec2/g6e.2xlarge",
 
61
  },
62
  "g6e.12xlarge": {
63
  "cloud": "AWS",
64
  "gpu": "4xNVIDIA L40S",
65
  "gpuRAM": "192 GB",
66
  "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
 
67
  },
68
  "g4dn.12xlarge": {
69
  "cloud": "AWS",
70
  "gpu": "4xNVIDIA T4",
71
  "gpuRAM": "64 GB",
72
  "url": "https://instances.vantage.sh/aws/ec2/g4dn.12xlarge",
 
73
  },
74
  "p4d.24xlarge": {
75
  "cloud": "AWS",
76
  "gpu": "4xNVIDIA A100",
77
  "gpuRAM": "320 GB",
78
  "url": "https://instances.vantage.sh/aws/ec2/p4d.24xlarge",
 
79
  },
80
  "p4de.24xlarge": {
81
  "cloud": "AWS",
82
  "gpu": "8xNVIDIA A100",
83
  "gpuRAM": "320 GB",
84
  "url": "https://instances.vantage.sh/aws/ec2/p4de.24xlarge",
 
85
  },
86
  "p5.48xlarge": {
87
  "cloud": "AWS",
88
  "gpu": "8xNVIDIA H100",
89
  "gpuRAM": "640GB",
90
  "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
 
91
  },
92
  "c6i.xlarge": {
93
  "cloud": "AWS",
@@ -100,36 +113,42 @@ instance_type_mappings = {
100
  "gpu": "-",
101
  "gpuRAM": "-",
102
  "url": "https://instances.vantage.sh/aws/ec2/c6i.2xlarge",
 
103
  },
104
  "c6i.4xlarge": {
105
  "cloud": "AWS",
106
  "gpu": "-",
107
  "gpuRAM": "-",
108
  "url": "https://instances.vantage.sh/aws/ec2/c6i.4xlarge",
 
109
  },
110
  "c7i.4xlarge": {
111
  "cloud": "AWS",
112
  "gpu": "-",
113
  "gpuRAM": "-",
114
  "url": "https://instances.vantage.sh/aws/ec2/c7i.4xlarge",
 
115
  },
116
- "inf2.2xlarge": {
117
  "cloud": "AWS",
118
  "gpu": "2xNeuronCore v2",
119
  "gpuRAM": "32 GB",
120
  "url": "https://instances.vantage.sh/aws/ec2/inf2.2xlarge",
 
121
  },
122
  "inf2.8xlarge": {
123
  "cloud": "AWS",
124
  "gpu": "1xNeuronCore v2",
125
  "gpuRAM": "32 GB",
126
  "url": "https://instances.vantage.sh/aws/ec2/inf2.8xlarge",
 
127
  },
128
  "inf2.24xlarge": {
129
  "cloud": "AWS",
130
  "gpu": "12xNeuronCore v2",
131
  "gpuRAM": "192 GB",
132
  "url": "https://instances.vantage.sh/aws/ec2/inf2.24xlarge",
 
133
  },
134
  "inf2.48xlarge": {
135
  "cloud": "AWS",
@@ -137,7 +156,6 @@ instance_type_mappings = {
137
  "gpuRAM": "384 GB",
138
  "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
139
  },
140
- "inf2.*": {"cloud": "AWS", "gpu": "-", "gpuRAM": "-", "url": ""},
141
  }
142
 
143
  results = {
 
16
  "gpu": "1xNVIDIA A10G",
17
  "gpuRAM": "24 GB",
18
  "url": "https://instances.vantage.sh/aws/ec2/g5.xlarge",
19
+ "price": 1.006,
20
  },
21
  "g5.2xlarge": {
22
  "cloud": "AWS",
23
  "gpu": "1xNVIDIA A10G",
24
  "gpuRAM": "24 GB",
25
  "url": "https://instances.vantage.sh/aws/ec2/g5.2xlarge",
26
+ "price": 1.212,
27
  },
28
  "g5.12xlarge": {
29
  "cloud": "AWS",
30
  "gpu": "4xNVIDIA A10G",
31
  "gpuRAM": "96 GB",
32
  "url": "https://instances.vantage.sh/aws/ec2/g5.12xlarge",
33
+ "price": 5.672,
34
  },
35
  "g5.48xlarge": {
36
  "cloud": "AWS",
37
  "gpu": "8xNVIDIA A10G",
38
  "gpuRAM": "192 GB",
39
  "url": "https://instances.vantage.sh/aws/ec2/g5.48xlarge",
40
+ "price": 16.288,
41
  },
42
  "g6.2xlarge": {
43
  "cloud": "AWS",
44
  "gpu": "1xNVIDIA L4",
45
  "gpuRAM": "24 GB",
46
  "url": "https://instances.vantage.sh/aws/ec2/g6.2xlarge",
47
+ "price": 0.9776,
48
  },
49
  "g6.12xlarge": {
50
  "cloud": "AWS",
51
  "gpu": "4xNVIDIA L4",
52
  "gpuRAM": "96 GB",
53
  "url": "https://instances.vantage.sh/aws/ec2/g6.12xlarge",
54
+ "price": 4.602,
55
  },
56
  "g6.48xlarge": {
57
  "cloud": "AWS",
58
  "gpu": "8xNVIDIA L4",
59
  "gpuRAM": "192 GB",
60
  "url": "https://instances.vantage.sh/aws/ec2/g6.48xlarge",
61
+ "price": 13.35,
62
  },
63
  "g6e.2xlarge": {
64
  "cloud": "AWS",
65
  "gpu": "1xNVIDIA L40S",
66
  "gpuRAM": "48 GB",
67
  "url": "https://instances.vantage.sh/aws/ec2/g6e.2xlarge",
68
+ "price": 2.242,
69
  },
70
  "g6e.12xlarge": {
71
  "cloud": "AWS",
72
  "gpu": "4xNVIDIA L40S",
73
  "gpuRAM": "192 GB",
74
  "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
75
+ "price": 10.493,
76
  },
77
  "g4dn.12xlarge": {
78
  "cloud": "AWS",
79
  "gpu": "4xNVIDIA T4",
80
  "gpuRAM": "64 GB",
81
  "url": "https://instances.vantage.sh/aws/ec2/g4dn.12xlarge",
82
+ "price": 3.912,
83
  },
84
  "p4d.24xlarge": {
85
  "cloud": "AWS",
86
  "gpu": "4xNVIDIA A100",
87
  "gpuRAM": "320 GB",
88
  "url": "https://instances.vantage.sh/aws/ec2/p4d.24xlarge",
89
+ "price": 32.773,
90
  },
91
  "p4de.24xlarge": {
92
  "cloud": "AWS",
93
  "gpu": "8xNVIDIA A100",
94
  "gpuRAM": "320 GB",
95
  "url": "https://instances.vantage.sh/aws/ec2/p4de.24xlarge",
96
+ "price": 40.966,
97
  },
98
  "p5.48xlarge": {
99
  "cloud": "AWS",
100
  "gpu": "8xNVIDIA H100",
101
  "gpuRAM": "640GB",
102
  "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
103
+ "price": 98.32,
104
  },
105
  "c6i.xlarge": {
106
  "cloud": "AWS",
 
113
  "gpu": "-",
114
  "gpuRAM": "-",
115
  "url": "https://instances.vantage.sh/aws/ec2/c6i.2xlarge",
116
+ "price": 0.34,
117
  },
118
  "c6i.4xlarge": {
119
  "cloud": "AWS",
120
  "gpu": "-",
121
  "gpuRAM": "-",
122
  "url": "https://instances.vantage.sh/aws/ec2/c6i.4xlarge",
123
+ "price": 0.68,
124
  },
125
  "c7i.4xlarge": {
126
  "cloud": "AWS",
127
  "gpu": "-",
128
  "gpuRAM": "-",
129
  "url": "https://instances.vantage.sh/aws/ec2/c7i.4xlarge",
130
+ "price": 0.714,
131
  },
132
+ "inf2.xlarge": {
133
  "cloud": "AWS",
134
  "gpu": "2xNeuronCore v2",
135
  "gpuRAM": "32 GB",
136
  "url": "https://instances.vantage.sh/aws/ec2/inf2.2xlarge",
137
+ "price": 0.7582,
138
  },
139
  "inf2.8xlarge": {
140
  "cloud": "AWS",
141
  "gpu": "1xNeuronCore v2",
142
  "gpuRAM": "32 GB",
143
  "url": "https://instances.vantage.sh/aws/ec2/inf2.8xlarge",
144
+ "price": 1.968,
145
  },
146
  "inf2.24xlarge": {
147
  "cloud": "AWS",
148
  "gpu": "12xNeuronCore v2",
149
  "gpuRAM": "192 GB",
150
  "url": "https://instances.vantage.sh/aws/ec2/inf2.24xlarge",
151
+ "price": 6.491,
152
  },
153
  "inf2.48xlarge": {
154
  "cloud": "AWS",
 
156
  "gpuRAM": "384 GB",
157
  "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
158
  },
 
159
  }
160
 
161
  results = {
results_arcee_agent.py CHANGED
@@ -41,13 +41,6 @@ results_arcee_agent = {
41
  "status": "OK",
42
  "tokensPerSecond": "54.2",
43
  },
44
- {
45
- "instanceType": "inf2.*",
46
- "container": "TGI 2.2.0",
47
- "status": "not supported",
48
- "tokensPerSecond": "-",
49
- "notes": "Qwen2: TGI OK, Neuron SDK KO, optimum-neuron KO",
50
- },
51
  {
52
  "instanceType": "g6e.2xlarge",
53
  "configurations": [
 
41
  "status": "OK",
42
  "tokensPerSecond": "54.2",
43
  },
 
 
 
 
 
 
 
44
  {
45
  "instanceType": "g6e.2xlarge",
46
  "configurations": [
results_llama_spark.py CHANGED
@@ -99,7 +99,7 @@ results_llama_spark = {
99
  "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
100
  },
101
  {
102
- "instanceType": "inf2.2xlarge",
103
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
104
  "quantization": "none",
105
  "status": "OK",
 
99
  "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
100
  },
101
  {
102
+ "instanceType": "inf2.xlarge",
103
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
104
  "quantization": "none",
105
  "status": "OK",