IlyasMoutawwakil HF staff commited on
Commit
9a3f7b4
Β·
1 Parent(s): a830adb
Files changed (2) hide show
  1. app.py +25 -10
  2. src/utils.py +15 -15
app.py CHANGED
@@ -20,7 +20,6 @@ from src.assets.text_content import (
20
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
21
  LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
22
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
23
- MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB πŸ–₯️"}
24
  ALL_COLUMNS_MAPPING = {
25
  "Model": "Model πŸ€—",
26
  "Arch": "Arch πŸ›οΈ",
@@ -42,7 +41,7 @@ ALL_COLUMNS_MAPPING = {
42
  "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) ⬇️",
43
  }
44
  SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
45
- SORTING_ASCENDING = [False, True]
46
  ALL_COLUMNS_DATATYPES = [
47
  # open llm
48
  "markdown",
@@ -73,24 +72,25 @@ hf_hub_download(
73
  repo_type="dataset",
74
  token=HF_TOKEN,
75
  )
76
- OPEN_LLM = pd.read_csv("dataset/open-llm.csv")
77
 
78
- MACHINE_TO_DATAFRAME = {}
 
79
  for machine in MACHINE_TO_HARDWARE:
80
  hf_hub_download(
81
  repo_id="optimum/llm-perf-dataset",
82
- filename=f"{machine}/full-report.csv",
83
  local_dir="dataset",
84
  repo_type="dataset",
85
  token=HF_TOKEN,
86
  )
87
- MACHINE_TO_DATAFRAME[machine] = pd.read_csv(f"dataset/{machine}/full-report.csv")
88
 
89
 
90
  def get_benchmark_df(machine="hf-dgx-01"):
91
  # merge on model
92
- llm_perf = MACHINE_TO_DATAFRAME[machine].copy()
93
- merged_df = OPEN_LLM.merge(llm_perf, left_on="Model", right_on="model")
94
  # transpose energy consumption
95
  merged_df["generate.energy_consumption(tokens/kWh)"] = (
96
  1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
@@ -110,8 +110,23 @@ def get_benchmark_df(machine="hf-dgx-01"):
110
  axis=1,
111
  )
112
  # add quantization scheme
113
- merged_df["quantization"] = merged_df["backend.quantization_scheme"].apply(
114
- lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  )
116
  # add decode throughput
117
  merged_df["decode.throughput(tokens/s)"] = (
 
20
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
21
  LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
22
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 
23
  ALL_COLUMNS_MAPPING = {
24
  "Model": "Model πŸ€—",
25
  "Arch": "Arch πŸ›οΈ",
 
41
  "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) ⬇️",
42
  }
43
  SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
44
+ SORTING_ASCENDING = [False, False]
45
  ALL_COLUMNS_DATATYPES = [
46
  # open llm
47
  "markdown",
 
72
  repo_type="dataset",
73
  token=HF_TOKEN,
74
  )
75
+ OPEN_LLM_DF = pd.read_csv("dataset/open-llm.csv")
76
 
77
+ MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB πŸ–₯️"}
78
+ MACHINE_TO_PERF = {}
79
  for machine in MACHINE_TO_HARDWARE:
80
  hf_hub_download(
81
  repo_id="optimum/llm-perf-dataset",
82
+ filename=f"{machine}/perf-report.csv",
83
  local_dir="dataset",
84
  repo_type="dataset",
85
  token=HF_TOKEN,
86
  )
87
+ MACHINE_TO_PERF[machine] = pd.read_csv(f"dataset/{machine}/perf-report.csv")
88
 
89
 
90
  def get_benchmark_df(machine="hf-dgx-01"):
91
  # merge on model
92
+ machine_perf_df = MACHINE_TO_PERF[machine].copy()
93
+ merged_df = OPEN_LLM_DF.merge(machine_perf_df, left_on="Model", right_on="model")
94
  # transpose energy consumption
95
  merged_df["generate.energy_consumption(tokens/kWh)"] = (
96
  1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
 
110
  axis=1,
111
  )
112
  # add quantization scheme
113
+ merged_df["quantization"] = merged_df[
114
+ ["backend.quantization_scheme", "backend.quantization_config.exllama_config.version"]
115
+ ].apply(
116
+ lambda x: "BnB.4bit"
117
+ if x["backend.quantization_scheme"] == "bnb"
118
+ else (
119
+ "GPTQ.4bit+ExllamaV1"
120
+ if (x["backend.quantization_scheme"] == "gptq")
121
+ and (x["backend.quantization_config.exllama_config.version"] == 1)
122
+ else (
123
+ "GPTQ.4bit+ExllamaV2"
124
+ if (x["backend.quantization_scheme"] == "gptq")
125
+ and (x["backend.quantization_config.exllama_config.version"] == 2)
126
+ else "None"
127
+ )
128
+ ),
129
+ axis=1,
130
  )
131
  # add decode throughput
132
  merged_df["decode.throughput(tokens/s)"] = (
src/utils.py CHANGED
@@ -18,25 +18,25 @@ def change_tab(query_param):
18
 
19
 
20
  LLM_MODEL_ARCHS = {
21
- "mixformer-sequential": "Phi Ο†",
22
- "RefinedWebModel": "Falcon πŸ¦…",
23
- "gpt_bigcode": "StarCoder ⭐",
24
- "RefinedWeb": "Falcon πŸ¦…",
25
- "baichuan": "Baichuan 🌊",
26
- "mistral": "Mistral Ⓜ️",
27
- "codegen": "CodeGen ♾️",
28
- "falcon": "Falcon πŸ¦…",
29
- "bloom": "Bloom 🌸",
30
- "llama": "LLaMA πŸ¦™",
31
- "mpt": "MPT 🧱",
32
- "Yi": "Yi δΊΊ",
 
 
33
  # suggest something
34
- "stablelm_epoch": "StableLM-Epoch",
35
- "stablelm_alpha": "StableLM-Alpha",
36
  "gpt_neox": "GPT-NeoX",
 
37
  "gpt_neo": "GPT-Neo",
38
  "chatglm": "ChatGLM",
39
- "internlm": "InternLM",
40
  "gpt2": "GPT-2",
41
  "gptj": "GPT-J",
42
  "xglm": "XGLM",
 
18
 
19
 
20
  LLM_MODEL_ARCHS = {
21
+ "stablelm_epoch": "πŸ”΄ StableLM-Epoch",
22
+ "stablelm_alpha": "πŸ”΄ StableLM-Alpha",
23
+ "mixformer-sequential": "Ο† Phi",
24
+ "RefinedWebModel": "πŸ¦… Falcon",
25
+ "gpt_bigcode": "⭐ StarCoder",
26
+ "RefinedWeb": "πŸ¦… Falcon",
27
+ "baichuan": "🌊 Baichuan",
28
+ "mistral": "Ⓜ️ Mistral",
29
+ "codegen": "♾️ CodeGen",
30
+ "falcon": "πŸ¦… Falcon",
31
+ "bloom": "🌸 Bloom",
32
+ "llama": "πŸ¦™ LLaMA",
33
+ "mpt": "🧱 MPT",
34
+ "Yi": "δΊΊ Yi",
35
  # suggest something
 
 
36
  "gpt_neox": "GPT-NeoX",
37
+ "internlm": "InternLM",
38
  "gpt_neo": "GPT-Neo",
39
  "chatglm": "ChatGLM",
 
40
  "gpt2": "GPT-2",
41
  "gptj": "GPT-J",
42
  "xglm": "XGLM",