Julien Simon commited on
Commit
633e287
1 Parent(s): eeaeace
results_arcee_fcv1.py CHANGED
@@ -14,7 +14,7 @@ results_arcee_fcv1 = {
14
  },
15
  {
16
  "instanceType": "g5.12xlarge",
17
- "quantization": "None",
18
  "container": "LMI 0.30+vLLM 0.6.2",
19
  "status": "OK",
20
  "tokensPerSecond": "23",
@@ -22,7 +22,7 @@ results_arcee_fcv1 = {
22
  },
23
  {
24
  "instanceType": "g6.12xlarge",
25
- "quantization": "None",
26
  "container": "LMI 0.30+vLLM 0.6.2",
27
  "status": "OK",
28
  "tokensPerSecond": "14",
@@ -30,7 +30,7 @@ results_arcee_fcv1 = {
30
  },
31
  {
32
  "instanceType": "g6e.12xlarge",
33
- "quantization": "None",
34
  "container": "vLLM0.6.4.post1",
35
  "status": "OK",
36
  "tokensPerSecond": "36",
@@ -38,7 +38,7 @@ results_arcee_fcv1 = {
38
  },
39
  {
40
  "instanceType": "g6e.12xlarge (2 GPUs)",
41
- "quantization": "None",
42
  "container": "vLLM 0.6.4.post1",
43
  "status": "OK",
44
  "tokensPerSecond": "21",
@@ -46,7 +46,7 @@ results_arcee_fcv1 = {
46
  },
47
  {
48
  "instanceType": "p4d.24xlarge",
49
- "quantization": "None",
50
  "container": "LMI 0.30+vLLM 0.6.2",
51
  "status": "OK",
52
  "tokensPerSecond": "72.5",
@@ -54,7 +54,7 @@ results_arcee_fcv1 = {
54
  },
55
  {
56
  "instanceType": "p5.48xlarge",
57
- "quantization": "None",
58
  "container": "vLLM 0.6.4.post1",
59
  "status": "OK",
60
  "tokensPerSecond": "117",
@@ -62,7 +62,7 @@ results_arcee_fcv1 = {
62
  },
63
  {
64
  "instanceType": "p5.48xlarge (4 GPUs)",
65
- "quantization": "None",
66
  "container": "vLLM 0.6.4.post1",
67
  "status": "OK",
68
  "tokensPerSecond": "88",
@@ -70,7 +70,7 @@ results_arcee_fcv1 = {
70
  },
71
  {
72
  "instanceType": "p5.48xlarge (2 GPUs)",
73
- "quantization": "None",
74
  "container": "vLLM 0.6.4.post1",
75
  "status": "OK",
76
  "tokensPerSecond": "58",
@@ -78,7 +78,7 @@ results_arcee_fcv1 = {
78
  },
79
  {
80
  "instanceType": "p5.48xlarge (1 GPU)",
81
- "quantization": "None",
82
  "container": "vLLM 0.6.4.post1",
83
  "status": "OK",
84
  "tokensPerSecond": "38",
 
14
  },
15
  {
16
  "instanceType": "g5.12xlarge",
17
+ "quantization": "none",
18
  "container": "LMI 0.30+vLLM 0.6.2",
19
  "status": "OK",
20
  "tokensPerSecond": "23",
 
22
  },
23
  {
24
  "instanceType": "g6.12xlarge",
25
+ "quantization": "none",
26
  "container": "LMI 0.30+vLLM 0.6.2",
27
  "status": "OK",
28
  "tokensPerSecond": "14",
 
30
  },
31
  {
32
  "instanceType": "g6e.12xlarge",
33
+ "quantization": "none",
34
  "container": "vLLM0.6.4.post1",
35
  "status": "OK",
36
  "tokensPerSecond": "36",
 
38
  },
39
  {
40
  "instanceType": "g6e.12xlarge (2 GPUs)",
41
+ "quantization": "none",
42
  "container": "vLLM 0.6.4.post1",
43
  "status": "OK",
44
  "tokensPerSecond": "21",
 
46
  },
47
  {
48
  "instanceType": "p4d.24xlarge",
49
+ "quantization": "none",
50
  "container": "LMI 0.30+vLLM 0.6.2",
51
  "status": "OK",
52
  "tokensPerSecond": "72.5",
 
54
  },
55
  {
56
  "instanceType": "p5.48xlarge",
57
+ "quantization": "none",
58
  "container": "vLLM 0.6.4.post1",
59
  "status": "OK",
60
  "tokensPerSecond": "117",
 
62
  },
63
  {
64
  "instanceType": "p5.48xlarge (4 GPUs)",
65
+ "quantization": "none",
66
  "container": "vLLM 0.6.4.post1",
67
  "status": "OK",
68
  "tokensPerSecond": "88",
 
70
  },
71
  {
72
  "instanceType": "p5.48xlarge (2 GPUs)",
73
+ "quantization": "none",
74
  "container": "vLLM 0.6.4.post1",
75
  "status": "OK",
76
  "tokensPerSecond": "58",
 
78
  },
79
  {
80
  "instanceType": "p5.48xlarge (1 GPU)",
81
+ "quantization": "none",
82
  "container": "vLLM 0.6.4.post1",
83
  "status": "OK",
84
  "tokensPerSecond": "38",
results_arcee_nova.py CHANGED
@@ -155,7 +155,7 @@ results_arcee_nova = {
155
  },
156
  {
157
  "instanceType": "p5.48xlarge",
158
- "quantization": "None",
159
  "container": "vLLM 0.6.4.post1",
160
  "status": "OK",
161
  "tokensPerSecond": "76",
@@ -163,7 +163,7 @@ results_arcee_nova = {
163
  },
164
  {
165
  "instanceType": "p5.48xlarge (4 GPUs)",
166
- "quantization": "None",
167
  "container": "vLLM 0.6.4.post1",
168
  "status": "OK",
169
  "tokensPerSecond": "51",
@@ -171,7 +171,7 @@ results_arcee_nova = {
171
  },
172
  {
173
  "instanceType": "p5.48xlarge (2 GPUs)",
174
- "quantization": "None",
175
  "container": "vLLM 0.6.4.post1",
176
  "status": "OK",
177
  "tokensPerSecond": "32",
 
155
  },
156
  {
157
  "instanceType": "p5.48xlarge",
158
+ "quantization": "none",
159
  "container": "vLLM 0.6.4.post1",
160
  "status": "OK",
161
  "tokensPerSecond": "76",
 
163
  },
164
  {
165
  "instanceType": "p5.48xlarge (4 GPUs)",
166
+ "quantization": "none",
167
  "container": "vLLM 0.6.4.post1",
168
  "status": "OK",
169
  "tokensPerSecond": "51",
 
171
  },
172
  {
173
  "instanceType": "p5.48xlarge (2 GPUs)",
174
+ "quantization": "none",
175
  "container": "vLLM 0.6.4.post1",
176
  "status": "OK",
177
  "tokensPerSecond": "32",
results_arcee_supernova.py CHANGED
@@ -46,7 +46,7 @@ results_arcee_supernova = {
46
  },
47
  {
48
  "instanceType": "g6e.12xlarge",
49
- "quantization": "None",
50
  "container": "vLLM 0.6.3",
51
  "status": "OK",
52
  "tokensPerSecond": "18.6",
@@ -186,15 +186,15 @@ results_arcee_supernova = {
186
  },
187
  {
188
  "instanceType": "p5.48xlarge",
189
- "quantization": "None",
190
  "container": "vLLM 0.6.4.post1",
191
- "status": "N/A",
192
  "tokensPerSecond": "77",
193
  "notes": "--tensor-parallel-size 8",
194
  },
195
  {
196
  "instanceType": "p5.48xlarge (4 GPUs)",
197
- "quantization": "None",
198
  "container": "vLLM 0.6.4.post1",
199
  "status": "OK",
200
  "tokensPerSecond": "53",
@@ -202,7 +202,7 @@ results_arcee_supernova = {
202
  },
203
  {
204
  "instanceType": "p5.48xlarge (2 GPUs)",
205
- "quantization": "None",
206
  "container": "vLLM 0.6.4.post1",
207
  "status": "OK",
208
  "tokensPerSecond": "33",
 
46
  },
47
  {
48
  "instanceType": "g6e.12xlarge",
49
+ "quantization": "none",
50
  "container": "vLLM 0.6.3",
51
  "status": "OK",
52
  "tokensPerSecond": "18.6",
 
186
  },
187
  {
188
  "instanceType": "p5.48xlarge",
189
+ "quantization": "none",
190
  "container": "vLLM 0.6.4.post1",
191
+ "status": "OK",
192
  "tokensPerSecond": "77",
193
  "notes": "--tensor-parallel-size 8",
194
  },
195
  {
196
  "instanceType": "p5.48xlarge (4 GPUs)",
197
+ "quantization": "none",
198
  "container": "vLLM 0.6.4.post1",
199
  "status": "OK",
200
  "tokensPerSecond": "53",
 
202
  },
203
  {
204
  "instanceType": "p5.48xlarge (2 GPUs)",
205
+ "quantization": "none",
206
  "container": "vLLM 0.6.4.post1",
207
  "status": "OK",
208
  "tokensPerSecond": "33",
results_arcee_supernova_medius.py CHANGED
@@ -22,7 +22,7 @@ results_arcee_supernova_medius = {
22
  },
23
  {
24
  "instanceType": "g5.12xlarge",
25
- "quantization": "None",
26
  "container": "LMI 0.30+vLLM 0.6.2",
27
  "status": "OK",
28
  "tokensPerSecond": "45",
@@ -30,7 +30,7 @@ results_arcee_supernova_medius = {
30
  },
31
  {
32
  "instanceType": "g6.12xlarge",
33
- "quantization": "None",
34
  "container": "LMI 0.30+vLLM 0.6.2",
35
  "status": "OK",
36
  "tokensPerSecond": "29",
@@ -38,7 +38,7 @@ results_arcee_supernova_medius = {
38
  },
39
  {
40
  "instanceType": "g6e.12xlarge",
41
- "quantization": "None",
42
  "container": "vLLM0.6.4.post1",
43
  "status": "OK",
44
  "tokensPerSecond": "70",
@@ -46,7 +46,7 @@ results_arcee_supernova_medius = {
46
  },
47
  {
48
  "instanceType": "g6e.12xlarge (2 GPUs)",
49
- "quantization": "None",
50
  "container": "vLLM 0.6.4.post1",
51
  "status": "OK",
52
  "tokensPerSecond": "43",
@@ -54,7 +54,7 @@ results_arcee_supernova_medius = {
54
  },
55
  {
56
  "instanceType": "p4d.24xlarge",
57
- "quantization": "None",
58
  "container": "LMI 0.30+vLLM 0.6.2",
59
  "status": "OK",
60
  "tokensPerSecond": "108",
@@ -62,7 +62,7 @@ results_arcee_supernova_medius = {
62
  },
63
  {
64
  "instanceType": "p5.48xlarge",
65
- "quantization": "None",
66
  "container": "vLLM 0.6.4.post1",
67
  "status": "OK",
68
  "tokensPerSecond": "162",
@@ -70,7 +70,7 @@ results_arcee_supernova_medius = {
70
  },
71
  {
72
  "instanceType": "p5.48xlarge (4 GPUs)",
73
- "quantization": "None",
74
  "container": "vLLM 0.6.4.post1",
75
  "status": "OK",
76
  "tokensPerSecond": "138",
@@ -78,7 +78,7 @@ results_arcee_supernova_medius = {
78
  },
79
  {
80
  "instanceType": "p5.48xlarge (2 GPUs)",
81
- "quantization": "None",
82
  "container": "vLLM 0.6.4.post1",
83
  "status": "OK",
84
  "tokensPerSecond": "102",
@@ -86,7 +86,7 @@ results_arcee_supernova_medius = {
86
  },
87
  {
88
  "instanceType": "p5.48xlarge (1 GPU)",
89
- "quantization": "None",
90
  "container": "vLLM 0.6.4.post1",
91
  "status": "OK",
92
  "tokensPerSecond": "73",
 
22
  },
23
  {
24
  "instanceType": "g5.12xlarge",
25
+ "quantization": "none",
26
  "container": "LMI 0.30+vLLM 0.6.2",
27
  "status": "OK",
28
  "tokensPerSecond": "45",
 
30
  },
31
  {
32
  "instanceType": "g6.12xlarge",
33
+ "quantization": "none",
34
  "container": "LMI 0.30+vLLM 0.6.2",
35
  "status": "OK",
36
  "tokensPerSecond": "29",
 
38
  },
39
  {
40
  "instanceType": "g6e.12xlarge",
41
+ "quantization": "none",
42
  "container": "vLLM0.6.4.post1",
43
  "status": "OK",
44
  "tokensPerSecond": "70",
 
46
  },
47
  {
48
  "instanceType": "g6e.12xlarge (2 GPUs)",
49
+ "quantization": "none",
50
  "container": "vLLM 0.6.4.post1",
51
  "status": "OK",
52
  "tokensPerSecond": "43",
 
54
  },
55
  {
56
  "instanceType": "p4d.24xlarge",
57
+ "quantization": "none",
58
  "container": "LMI 0.30+vLLM 0.6.2",
59
  "status": "OK",
60
  "tokensPerSecond": "108",
 
62
  },
63
  {
64
  "instanceType": "p5.48xlarge",
65
+ "quantization": "none",
66
  "container": "vLLM 0.6.4.post1",
67
  "status": "OK",
68
  "tokensPerSecond": "162",
 
70
  },
71
  {
72
  "instanceType": "p5.48xlarge (4 GPUs)",
73
+ "quantization": "none",
74
  "container": "vLLM 0.6.4.post1",
75
  "status": "OK",
76
  "tokensPerSecond": "138",
 
78
  },
79
  {
80
  "instanceType": "p5.48xlarge (2 GPUs)",
81
+ "quantization": "none",
82
  "container": "vLLM 0.6.4.post1",
83
  "status": "OK",
84
  "tokensPerSecond": "102",
 
86
  },
87
  {
88
  "instanceType": "p5.48xlarge (1 GPU)",
89
+ "quantization": "none",
90
  "container": "vLLM 0.6.4.post1",
91
  "status": "OK",
92
  "tokensPerSecond": "73",