Benchmarks / results_llama_supernova_lite.py
Julien Simon
Add SuperNova Lite on r8g.8xlarge
31ed7d9
raw
history blame
1.21 kB
"""Module containing performance results for the Llama-3-Supernova-Lite model."""
results_llama_supernova_lite = {
"name": "Llama-3.1-SuperNova-Lite",
"modelType": "Llama 3.1 8B",
"configurations": [
{
"instanceType": "c7g.8xlarge",
"quantization": "Q4_0_8_8",
"container": "llama.cpp 9/18/24",
"status": "OK",
"tokensPerSecond": "39.7",
"notes": "requantized from Q4_K_S",
},
{
"instanceType": "c7g.16xlarge",
"quantization": "Q4_0_8_8",
"container": "llama.cpp 9/18/24",
"status": "OK",
"tokensPerSecond": "45.5",
"notes": "",
},
{
"instanceType": "r8g.8xlarge",
"quantization": "Q4_0_4_8",
"container": "llama.cpp 9/11/24",
"status": "OK",
"tokensPerSecond": "57",
"notes": "",
},
{
"instanceType": "r8g.16xlarge",
"quantization": "Q4_0_4_8",
"container": "llama.cpp 9/11/24",
"status": "OK",
"tokensPerSecond": "65",
"notes": "",
},
],
}