Spaces:

optimum
/

llm-perf-leaderboard

Running

App Files Files Community

IlyasMoutawwakil HF staff commited on Nov 18, 2023

Commit

9a3f7b4

1 Parent(s): a830adb

update

Browse files

Files changed (2) hide show

app.py +25 -10
src/utils.py +15 -15

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ from src.assets.text_content import (
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
-MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB 🖥️"}
 ALL_COLUMNS_MAPPING = {
     "Model": "Model 🤗",
     "Arch": "Arch 🏛️",
@@ -42,7 +41,7 @@ ALL_COLUMNS_MAPPING = {
     "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) ⬇️",
 }
 SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
-SORTING_ASCENDING = [False, True]
 ALL_COLUMNS_DATATYPES = [
     # open llm
     "markdown",
@@ -73,24 +72,25 @@ hf_hub_download(
     repo_type="dataset",
     token=HF_TOKEN,
 )
-OPEN_LLM = pd.read_csv("dataset/open-llm.csv")
-MACHINE_TO_DATAFRAME = {}
 for machine in MACHINE_TO_HARDWARE:
     hf_hub_download(
         repo_id="optimum/llm-perf-dataset",
-        filename=f"{machine}/full-report.csv",
         local_dir="dataset",
         repo_type="dataset",
         token=HF_TOKEN,
     )
-    MACHINE_TO_DATAFRAME[machine] = pd.read_csv(f"dataset/{machine}/full-report.csv")
 def get_benchmark_df(machine="hf-dgx-01"):
     # merge on model
-    llm_perf = MACHINE_TO_DATAFRAME[machine].copy()
-    merged_df = OPEN_LLM.merge(llm_perf, left_on="Model", right_on="model")
     # transpose energy consumption
     merged_df["generate.energy_consumption(tokens/kWh)"] = (
         1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
@@ -110,8 +110,23 @@ def get_benchmark_df(machine="hf-dgx-01"):
         axis=1,
     )
     # add quantization scheme
-    merged_df["quantization"] = merged_df["backend.quantization_scheme"].apply(
-        lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
     )
     # add decode throughput
     merged_df["decode.throughput(tokens/s)"] = (

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 ALL_COLUMNS_MAPPING = {
     "Model": "Model 🤗",
     "Arch": "Arch 🏛️",
     "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) ⬇️",
 }
 SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
+SORTING_ASCENDING = [False, False]
 ALL_COLUMNS_DATATYPES = [
     # open llm
     "markdown",
     repo_type="dataset",
     token=HF_TOKEN,
 )
+OPEN_LLM_DF = pd.read_csv("dataset/open-llm.csv")
+MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB 🖥️"}
+MACHINE_TO_PERF = {}
 for machine in MACHINE_TO_HARDWARE:
     hf_hub_download(
         repo_id="optimum/llm-perf-dataset",
+        filename=f"{machine}/perf-report.csv",
         local_dir="dataset",
         repo_type="dataset",
         token=HF_TOKEN,
     )
+    MACHINE_TO_PERF[machine] = pd.read_csv(f"dataset/{machine}/perf-report.csv")
 def get_benchmark_df(machine="hf-dgx-01"):
     # merge on model
+    machine_perf_df = MACHINE_TO_PERF[machine].copy()
+    merged_df = OPEN_LLM_DF.merge(machine_perf_df, left_on="Model", right_on="model")
     # transpose energy consumption
     merged_df["generate.energy_consumption(tokens/kWh)"] = (
         1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
         axis=1,
     )
     # add quantization scheme
+    merged_df["quantization"] = merged_df[
+        ["backend.quantization_scheme", "backend.quantization_config.exllama_config.version"]
+    ].apply(
+        lambda x: "BnB.4bit"
+        if x["backend.quantization_scheme"] == "bnb"
+        else (
+            "GPTQ.4bit+ExllamaV1"
+            if (x["backend.quantization_scheme"] == "gptq")
+            and (x["backend.quantization_config.exllama_config.version"] == 1)
+            else (
+                "GPTQ.4bit+ExllamaV2"
+                if (x["backend.quantization_scheme"] == "gptq")
+                and (x["backend.quantization_config.exllama_config.version"] == 2)
+                else "None"
+            )
+        ),
+        axis=1,
     )
     # add decode throughput
     merged_df["decode.throughput(tokens/s)"] = (

src/utils.py CHANGED Viewed

@@ -18,25 +18,25 @@ def change_tab(query_param):
 LLM_MODEL_ARCHS = {
-    "mixformer-sequential": "Phi φ",
-    "RefinedWebModel": "Falcon 🦅",
-    "gpt_bigcode": "StarCoder ⭐",
-    "RefinedWeb": "Falcon 🦅",
-    "baichuan": "Baichuan 🌊",
-    "mistral": "Mistral Ⓜ️",
-    "codegen": "CodeGen ♾️",
-    "falcon": "Falcon 🦅",
-    "bloom": "Bloom 🌸",
-    "llama": "LLaMA 🦙",
-    "mpt": "MPT 🧱",
-    "Yi": "Yi 人",
     # suggest something
-    "stablelm_epoch": "StableLM-Epoch",
-    "stablelm_alpha": "StableLM-Alpha",
     "gpt_neox": "GPT-NeoX",
     "gpt_neo": "GPT-Neo",
     "chatglm": "ChatGLM",
-    "internlm": "InternLM",
     "gpt2": "GPT-2",
     "gptj": "GPT-J",
     "xglm": "XGLM",

 LLM_MODEL_ARCHS = {
+    "stablelm_epoch": "🔴 StableLM-Epoch",
+    "stablelm_alpha": "🔴 StableLM-Alpha",
+    "mixformer-sequential": "φ Phi",
+    "RefinedWebModel": "🦅 Falcon",
+    "gpt_bigcode": "⭐ StarCoder",
+    "RefinedWeb": "🦅 Falcon",
+    "baichuan": "🌊 Baichuan",
+    "mistral": "Ⓜ️ Mistral",
+    "codegen": "♾️ CodeGen",
+    "falcon": "🦅 Falcon",
+    "bloom": "🌸 Bloom",
+    "llama": "🦙 LLaMA",
+    "mpt": "🧱 MPT",
+    "Yi": "人 Yi",
     # suggest something
     "gpt_neox": "GPT-NeoX",
+    "internlm": "InternLM",
     "gpt_neo": "GPT-Neo",
     "chatglm": "ChatGLM",
     "gpt2": "GPT-2",
     "gptj": "GPT-J",
     "xglm": "XGLM",