Spaces:
Running
Running
Commit
Β·
9a3f7b4
1
Parent(s):
a830adb
update
Browse files- app.py +25 -10
- src/utils.py +15 -15
app.py
CHANGED
@@ -20,7 +20,6 @@ from src.assets.text_content import (
|
|
20 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
21 |
LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
|
22 |
LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
23 |
-
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"}
|
24 |
ALL_COLUMNS_MAPPING = {
|
25 |
"Model": "Model π€",
|
26 |
"Arch": "Arch ποΈ",
|
@@ -42,7 +41,7 @@ ALL_COLUMNS_MAPPING = {
|
|
42 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
|
43 |
}
|
44 |
SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
|
45 |
-
SORTING_ASCENDING = [False,
|
46 |
ALL_COLUMNS_DATATYPES = [
|
47 |
# open llm
|
48 |
"markdown",
|
@@ -73,24 +72,25 @@ hf_hub_download(
|
|
73 |
repo_type="dataset",
|
74 |
token=HF_TOKEN,
|
75 |
)
|
76 |
-
|
77 |
|
78 |
-
|
|
|
79 |
for machine in MACHINE_TO_HARDWARE:
|
80 |
hf_hub_download(
|
81 |
repo_id="optimum/llm-perf-dataset",
|
82 |
-
filename=f"{machine}/
|
83 |
local_dir="dataset",
|
84 |
repo_type="dataset",
|
85 |
token=HF_TOKEN,
|
86 |
)
|
87 |
-
|
88 |
|
89 |
|
90 |
def get_benchmark_df(machine="hf-dgx-01"):
|
91 |
# merge on model
|
92 |
-
|
93 |
-
merged_df =
|
94 |
# transpose energy consumption
|
95 |
merged_df["generate.energy_consumption(tokens/kWh)"] = (
|
96 |
1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
|
@@ -110,8 +110,23 @@ def get_benchmark_df(machine="hf-dgx-01"):
|
|
110 |
axis=1,
|
111 |
)
|
112 |
# add quantization scheme
|
113 |
-
merged_df["quantization"] = merged_df[
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
)
|
116 |
# add decode throughput
|
117 |
merged_df["decode.throughput(tokens/s)"] = (
|
|
|
20 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
21 |
LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
|
22 |
LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
|
|
23 |
ALL_COLUMNS_MAPPING = {
|
24 |
"Model": "Model π€",
|
25 |
"Arch": "Arch ποΈ",
|
|
|
41 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
|
42 |
}
|
43 |
SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
|
44 |
+
SORTING_ASCENDING = [False, False]
|
45 |
ALL_COLUMNS_DATATYPES = [
|
46 |
# open llm
|
47 |
"markdown",
|
|
|
72 |
repo_type="dataset",
|
73 |
token=HF_TOKEN,
|
74 |
)
|
75 |
+
OPEN_LLM_DF = pd.read_csv("dataset/open-llm.csv")
|
76 |
|
77 |
+
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"}
|
78 |
+
MACHINE_TO_PERF = {}
|
79 |
for machine in MACHINE_TO_HARDWARE:
|
80 |
hf_hub_download(
|
81 |
repo_id="optimum/llm-perf-dataset",
|
82 |
+
filename=f"{machine}/perf-report.csv",
|
83 |
local_dir="dataset",
|
84 |
repo_type="dataset",
|
85 |
token=HF_TOKEN,
|
86 |
)
|
87 |
+
MACHINE_TO_PERF[machine] = pd.read_csv(f"dataset/{machine}/perf-report.csv")
|
88 |
|
89 |
|
90 |
def get_benchmark_df(machine="hf-dgx-01"):
|
91 |
# merge on model
|
92 |
+
machine_perf_df = MACHINE_TO_PERF[machine].copy()
|
93 |
+
merged_df = OPEN_LLM_DF.merge(machine_perf_df, left_on="Model", right_on="model")
|
94 |
# transpose energy consumption
|
95 |
merged_df["generate.energy_consumption(tokens/kWh)"] = (
|
96 |
1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
|
|
|
110 |
axis=1,
|
111 |
)
|
112 |
# add quantization scheme
|
113 |
+
merged_df["quantization"] = merged_df[
|
114 |
+
["backend.quantization_scheme", "backend.quantization_config.exllama_config.version"]
|
115 |
+
].apply(
|
116 |
+
lambda x: "BnB.4bit"
|
117 |
+
if x["backend.quantization_scheme"] == "bnb"
|
118 |
+
else (
|
119 |
+
"GPTQ.4bit+ExllamaV1"
|
120 |
+
if (x["backend.quantization_scheme"] == "gptq")
|
121 |
+
and (x["backend.quantization_config.exllama_config.version"] == 1)
|
122 |
+
else (
|
123 |
+
"GPTQ.4bit+ExllamaV2"
|
124 |
+
if (x["backend.quantization_scheme"] == "gptq")
|
125 |
+
and (x["backend.quantization_config.exllama_config.version"] == 2)
|
126 |
+
else "None"
|
127 |
+
)
|
128 |
+
),
|
129 |
+
axis=1,
|
130 |
)
|
131 |
# add decode throughput
|
132 |
merged_df["decode.throughput(tokens/s)"] = (
|
src/utils.py
CHANGED
@@ -18,25 +18,25 @@ def change_tab(query_param):
|
|
18 |
|
19 |
|
20 |
LLM_MODEL_ARCHS = {
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
|
|
|
|
33 |
# suggest something
|
34 |
-
"stablelm_epoch": "StableLM-Epoch",
|
35 |
-
"stablelm_alpha": "StableLM-Alpha",
|
36 |
"gpt_neox": "GPT-NeoX",
|
|
|
37 |
"gpt_neo": "GPT-Neo",
|
38 |
"chatglm": "ChatGLM",
|
39 |
-
"internlm": "InternLM",
|
40 |
"gpt2": "GPT-2",
|
41 |
"gptj": "GPT-J",
|
42 |
"xglm": "XGLM",
|
|
|
18 |
|
19 |
|
20 |
LLM_MODEL_ARCHS = {
|
21 |
+
"stablelm_epoch": "π΄ StableLM-Epoch",
|
22 |
+
"stablelm_alpha": "π΄ StableLM-Alpha",
|
23 |
+
"mixformer-sequential": "Ο Phi",
|
24 |
+
"RefinedWebModel": "π¦
Falcon",
|
25 |
+
"gpt_bigcode": "β StarCoder",
|
26 |
+
"RefinedWeb": "π¦
Falcon",
|
27 |
+
"baichuan": "π Baichuan",
|
28 |
+
"mistral": "βοΈ Mistral",
|
29 |
+
"codegen": "βΎοΈ CodeGen",
|
30 |
+
"falcon": "π¦
Falcon",
|
31 |
+
"bloom": "πΈ Bloom",
|
32 |
+
"llama": "π¦ LLaMA",
|
33 |
+
"mpt": "𧱠MPT",
|
34 |
+
"Yi": "δΊΊ Yi",
|
35 |
# suggest something
|
|
|
|
|
36 |
"gpt_neox": "GPT-NeoX",
|
37 |
+
"internlm": "InternLM",
|
38 |
"gpt_neo": "GPT-Neo",
|
39 |
"chatglm": "ChatGLM",
|
|
|
40 |
"gpt2": "GPT-2",
|
41 |
"gptj": "GPT-J",
|
42 |
"xglm": "XGLM",
|