Julien Simon commited on
Commit
4cec6db
1 Parent(s): 9e9181b

Add Graviton4

Browse files
results.py CHANGED
@@ -131,32 +131,39 @@ instance_type_mappings = {
131
  },
132
  "c7g.2xlarge": {
133
  "cloud": "AWS",
134
- "gpu": "-",
135
  "gpuRAM": "-",
136
  "url": "https://instances.vantage.sh/aws/ec2/c7g.2xlarge",
137
  "price": 0.29,
138
  },
139
  "c7g.4xlarge": {
140
  "cloud": "AWS",
141
- "gpu": "-",
142
  "gpuRAM": "-",
143
  "url": "https://instances.vantage.sh/aws/ec2/c7g.4xlarge",
144
  "price": 0.58,
145
  },
146
  "c7g.8xlarge": {
147
  "cloud": "AWS",
148
- "gpu": "-",
149
  "gpuRAM": "-",
150
  "url": "https://instances.vantage.sh/aws/ec2/c7g.8xlarge",
151
  "price": 1.16,
152
  },
153
  "c7g.16xlarge": {
154
  "cloud": "AWS",
155
- "gpu": "-",
156
  "gpuRAM": "-",
157
  "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
158
  "price": 2.32,
159
  },
 
 
 
 
 
 
 
160
  "inf2.xlarge": {
161
  "cloud": "AWS",
162
  "gpu": "2xNeuronCore v2",
 
131
  },
132
  "c7g.2xlarge": {
133
  "cloud": "AWS",
134
+ "gpu": "None - Graviton3, 16 vCPUs",
135
  "gpuRAM": "-",
136
  "url": "https://instances.vantage.sh/aws/ec2/c7g.2xlarge",
137
  "price": 0.29,
138
  },
139
  "c7g.4xlarge": {
140
  "cloud": "AWS",
141
+ "gpu": "None - Graviton3, 16 vCPUs",
142
  "gpuRAM": "-",
143
  "url": "https://instances.vantage.sh/aws/ec2/c7g.4xlarge",
144
  "price": 0.58,
145
  },
146
  "c7g.8xlarge": {
147
  "cloud": "AWS",
148
+ "gpu": "None - Graviton3, 32 vCPUs",
149
  "gpuRAM": "-",
150
  "url": "https://instances.vantage.sh/aws/ec2/c7g.8xlarge",
151
  "price": 1.16,
152
  },
153
  "c7g.16xlarge": {
154
  "cloud": "AWS",
155
+ "gpu": "None - Graviton3, 64 vCPUs",
156
  "gpuRAM": "-",
157
  "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
158
  "price": 2.32,
159
  },
160
+ "r8g.16xlarge": {
161
+ "cloud": "AWS",
162
+ "gpu": "None - Graviton4, 64 vCPUs",
163
+ "gpuRAM": "-",
164
+ "url": "https://instances.vantage.sh/aws/ec2/r8g.16xlarge",
165
+ "price": 3.77,
166
+ },
167
  "inf2.xlarge": {
168
  "cloud": "AWS",
169
  "gpu": "2xNeuronCore v2",
results_arcee_supernova.py CHANGED
@@ -12,6 +12,14 @@ results_arcee_supernova = {
12
  "tokensPerSecond": "6.5",
13
  "notes": "",
14
  },
 
 
 
 
 
 
 
 
15
  {
16
  "instanceType": "g5.12xlarge",
17
  "quantization": "awq",
 
12
  "tokensPerSecond": "6.5",
13
  "notes": "",
14
  },
15
+ {
16
+ "instanceType": "r8g.16xlarge",
17
+ "quantization": "Q4_0_4_8",
18
+ "container": "llama.cpp 9/19/24",
19
+ "status": "OK",
20
+ "tokensPerSecond": "25",
21
+ "notes": "",
22
+ },
23
  {
24
  "instanceType": "g5.12xlarge",
25
  "quantization": "awq",
results_llama_supernova_lite.py CHANGED
@@ -12,5 +12,29 @@ results_llama_supernova_lite = {
12
  "tokensPerSecond": "39.7",
13
  "notes": "requantized from Q4_K_S",
14
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ],
16
  }
 
12
  "tokensPerSecond": "39.7",
13
  "notes": "requantized from Q4_K_S",
14
  },
15
+ {
16
+ "instanceType": "c7g.16xlarge",
17
+ "quantization": "Q4_0_8_8",
18
+ "container": "llama.cpp 9/18/24",
19
+ "status": "OK",
20
+ "tokensPerSecond": "45.5",
21
+ "notes": "",
22
+ },
23
+ {
24
+ "instanceType": "r8g.16xlarge",
25
+ "quantization": "Q4_0_4_8",
26
+ "container": "llama.cpp 9/119/24",
27
+ "status": "OK",
28
+ "tokensPerSecond": "65",
29
+ "notes": "",
30
+ },
31
+ {
32
+ "instanceType": "inf2.xlarge",
33
+ "quantization": "Q4_0_8_8",
34
+ "container": "llama.cpp 9/18/24",
35
+ "status": "OK",
36
+ "tokensPerSecond": "39.7",
37
+ "notes": "requantized from Q4_K_S",
38
+ },
39
  ],
40
  }