Julien Simon commited on
Commit
86d92de
·
1 Parent(s): 10318fd

Remove FCV1

Browse files
Files changed (3) hide show
  1. results.py +0 -2
  2. results_arcee_fcv1.py +0 -88
  3. results_virtuoso_medium.py +82 -1
results.py CHANGED
@@ -1,7 +1,6 @@
1
  """Module containing model configuration results for various AI models and hardware setups."""
2
 
3
  from results_arcee_agent import results_arcee_agent
4
- from results_arcee_fcv1 import results_arcee_fcv1
5
  from results_arcee_lite import results_arcee_lite
6
  from results_arcee_meraj import results_arcee_meraj
7
  from results_arcee_nova import results_arcee_nova
@@ -281,7 +280,6 @@ results = {
281
  results_arcee_scribe,
282
  results_llama_supernova_lite,
283
  results_arcee_supernova_medius,
284
- results_arcee_fcv1,
285
  results_virtuoso_small,
286
  results_virtuoso_medium,
287
  results_virtuoso_large,
 
1
  """Module containing model configuration results for various AI models and hardware setups."""
2
 
3
  from results_arcee_agent import results_arcee_agent
 
4
  from results_arcee_lite import results_arcee_lite
5
  from results_arcee_meraj import results_arcee_meraj
6
  from results_arcee_nova import results_arcee_nova
 
280
  results_arcee_scribe,
281
  results_llama_supernova_lite,
282
  results_arcee_supernova_medius,
 
283
  results_virtuoso_small,
284
  results_virtuoso_medium,
285
  results_virtuoso_large,
results_arcee_fcv1.py DELETED
@@ -1,88 +0,0 @@
1
- """Module containing performance results for the Arcee-FCV1 model."""
2
-
3
- results_arcee_fcv1 = {
4
- "name": "Arcee-FCV1",
5
- "modelType": "Qwen2.5 32B",
6
- "configurations": [
7
- {
8
- "instanceType": "r8g.4xlarge",
9
- "quantization": "Q4_0_4_8",
10
- "container": "llama.cpp 11/27/24",
11
- "status": "OK",
12
- "tokensPerSecond": "10.5",
13
- "notes": "-fa",
14
- },
15
- {
16
- "instanceType": "g5.12xlarge",
17
- "quantization": "none",
18
- "container": "LMI 0.30+vLLM 0.6.2",
19
- "status": "OK",
20
- "tokensPerSecond": "23",
21
- "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
22
- },
23
- {
24
- "instanceType": "g6.12xlarge",
25
- "quantization": "none",
26
- "container": "LMI 0.30+vLLM 0.6.2",
27
- "status": "OK",
28
- "tokensPerSecond": "14",
29
- "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
30
- },
31
- {
32
- "instanceType": "g6e.12xlarge",
33
- "quantization": "none",
34
- "container": "vLLM0.6.4.post1",
35
- "status": "OK",
36
- "tokensPerSecond": "36",
37
- "notes": "--tensor-parallel-size 4 --max-model-len 16384",
38
- },
39
- {
40
- "instanceType": "g6e.12xlarge (2 GPUs)",
41
- "quantization": "none",
42
- "container": "vLLM 0.6.4.post1",
43
- "status": "OK",
44
- "tokensPerSecond": "21",
45
- "notes": "--tensor-parallel-size 2 --max-model-len 16384",
46
- },
47
- {
48
- "instanceType": "p4d.24xlarge",
49
- "quantization": "none",
50
- "container": "LMI 0.30+vLLM 0.6.2",
51
- "status": "OK",
52
- "tokensPerSecond": "72.5",
53
- "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
54
- },
55
- {
56
- "instanceType": "p5.48xlarge",
57
- "quantization": "none",
58
- "container": "vLLM 0.6.4.post1",
59
- "status": "OK",
60
- "tokensPerSecond": "117",
61
- "notes": "--tensor-parallel-size 8",
62
- },
63
- {
64
- "instanceType": "p5.48xlarge (4 GPUs)",
65
- "quantization": "none",
66
- "container": "vLLM 0.6.4.post1",
67
- "status": "OK",
68
- "tokensPerSecond": "88",
69
- "notes": "--tensor-parallel-size 4",
70
- },
71
- {
72
- "instanceType": "p5.48xlarge (2 GPUs)",
73
- "quantization": "none",
74
- "container": "vLLM 0.6.4.post1",
75
- "status": "OK",
76
- "tokensPerSecond": "58",
77
- "notes": "--tensor-parallel-size 2",
78
- },
79
- {
80
- "instanceType": "p5.48xlarge (1 GPU)",
81
- "quantization": "none",
82
- "container": "vLLM 0.6.4.post1",
83
- "status": "OK",
84
- "tokensPerSecond": "38",
85
- "notes": "--tensor-parallel-size 1",
86
- },
87
- ],
88
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results_virtuoso_medium.py CHANGED
@@ -3,5 +3,86 @@
3
  results_virtuoso_medium = {
4
  "name": "Virtuoso-Medium",
5
  "modelType": "Qwen2.5 32B",
6
- "configurations": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
3
  results_virtuoso_medium = {
4
  "name": "Virtuoso-Medium",
5
  "modelType": "Qwen2.5 32B",
6
+ "configurations": [
7
+ {
8
+ "instanceType": "r8g.4xlarge",
9
+ "quantization": "Q4_0_4_8",
10
+ "container": "llama.cpp 11/27/24",
11
+ "status": "OK",
12
+ "tokensPerSecond": "10.5",
13
+ "notes": "-fa",
14
+ },
15
+ {
16
+ "instanceType": "g5.12xlarge",
17
+ "quantization": "none",
18
+ "container": "LMI 0.30+vLLM 0.6.2",
19
+ "status": "OK",
20
+ "tokensPerSecond": "23",
21
+ "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
22
+ },
23
+ {
24
+ "instanceType": "g6.12xlarge",
25
+ "quantization": "none",
26
+ "container": "LMI 0.30+vLLM 0.6.2",
27
+ "status": "OK",
28
+ "tokensPerSecond": "14",
29
+ "notes": '"OPTION_MAX_MODEL_LEN": "16384",\n"TENSOR_PARALLEL_DEGREE": "max",',
30
+ },
31
+ {
32
+ "instanceType": "g6e.12xlarge",
33
+ "quantization": "none",
34
+ "container": "vLLM0.6.4.post1",
35
+ "status": "OK",
36
+ "tokensPerSecond": "36",
37
+ "notes": "--tensor-parallel-size 4 --max-model-len 16384",
38
+ },
39
+ {
40
+ "instanceType": "g6e.12xlarge (2 GPUs)",
41
+ "quantization": "none",
42
+ "container": "vLLM 0.6.4.post1",
43
+ "status": "OK",
44
+ "tokensPerSecond": "21",
45
+ "notes": "--tensor-parallel-size 2 --max-model-len 16384",
46
+ },
47
+ {
48
+ "instanceType": "p4d.24xlarge",
49
+ "quantization": "none",
50
+ "container": "LMI 0.30+vLLM 0.6.2",
51
+ "status": "OK",
52
+ "tokensPerSecond": "72.5",
53
+ "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
54
+ },
55
+ {
56
+ "instanceType": "p5.48xlarge",
57
+ "quantization": "none",
58
+ "container": "vLLM 0.6.4.post1",
59
+ "status": "OK",
60
+ "tokensPerSecond": "117",
61
+ "notes": "--tensor-parallel-size 8",
62
+ },
63
+ {
64
+ "instanceType": "p5.48xlarge (4 GPUs)",
65
+ "quantization": "none",
66
+ "container": "vLLM 0.6.4.post1",
67
+ "status": "OK",
68
+ "tokensPerSecond": "88",
69
+ "notes": "--tensor-parallel-size 4",
70
+ },
71
+ {
72
+ "instanceType": "p5.48xlarge (2 GPUs)",
73
+ "quantization": "none",
74
+ "container": "vLLM 0.6.4.post1",
75
+ "status": "OK",
76
+ "tokensPerSecond": "58",
77
+ "notes": "--tensor-parallel-size 2",
78
+ },
79
+ {
80
+ "instanceType": "p5.48xlarge (1 GPU)",
81
+ "quantization": "none",
82
+ "container": "vLLM 0.6.4.post1",
83
+ "status": "OK",
84
+ "tokensPerSecond": "38",
85
+ "notes": "--tensor-parallel-size 1",
86
+ },
87
+ ],
88
  }