Julien Simon commited on
Commit
534766c
1 Parent(s): d0c0894

More SuperNova

Browse files
results_arcee_supernova.py CHANGED
@@ -50,7 +50,7 @@ results_arcee_supernova = {
50
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
51
  "status": "???",
52
  "tokensPerSecond": "???",
53
- "notes": "bs=2,seqlen=8192 - SDK 2.19.1",
54
  },
55
  ],
56
  },
@@ -83,7 +83,7 @@ results_arcee_supernova = {
83
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
84
  "status": "OK",
85
  "tokensPerSecond": "14",
86
- "notes": "bs=2,seqlen=8192 - SDK 2.19.1",
87
  },
88
  ],
89
  },
@@ -102,7 +102,7 @@ results_arcee_supernova = {
102
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
103
  "status": "OK",
104
  "tokensPerSecond": "24.6",
105
- "notes": "bs=2,seqlen=8192 - SDK 2.19.1",
106
  },
107
  ],
108
  },
 
50
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
51
  "status": "???",
52
  "tokensPerSecond": "???",
53
+ "notes": "bs=2,seqlen=8192 - SDK 2.19.1 - OPTION_LOAD_IN_8BIT=True",
54
  },
55
  ],
56
  },
 
83
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
84
  "status": "OK",
85
  "tokensPerSecond": "14",
86
+ "notes": "bs=2,seqlen=8192 - SDK 2.19.1 - OPTION_LOAD_IN_8BIT=True",
87
  },
88
  ],
89
  },
 
102
  "container": "LMI 0.29+transformers-neuronx 0.11.351",
103
  "status": "OK",
104
  "tokensPerSecond": "24.6",
105
+ "notes": "bs=2,seqlen=8192 - SDK 2.19.1 - OPTION_LOAD_IN_8BIT=True",
106
  },
107
  ],
108
  },
results_llama_supernova_lite.py CHANGED
@@ -4,6 +4,13 @@ results_llama_supernova_lite = {
4
  "name": "Llama-3.1-SuperNova-Lite",
5
  "modelType": "Llama 3.1 8B",
6
  "configurations": [
7
- {},
 
 
 
 
 
 
 
8
  ],
9
  }
 
4
  "name": "Llama-3.1-SuperNova-Lite",
5
  "modelType": "Llama 3.1 8B",
6
  "configurations": [
7
+ {
8
+ "instanceType": "c7g.8xlarge",
9
+ "quantization": "Q4_0_8_8",
10
+ "container": "llama.cpp 9/18/24",
11
+ "status": "OK",
12
+ "tokensPerSecond": "39.7",
13
+ "notes": "requantized from Q4_K_S",
14
+ },
15
  ],
16
  }