Spaces:
Running
Running
Commit
ยท
bb5689a
1
Parent(s):
a1135a9
fix decode throughput
Browse files- src/latency_score_memory.py +1 -1
- src/llm_perf.py +0 -4
src/latency_score_memory.py
CHANGED
@@ -42,7 +42,7 @@ def get_lat_score_mem_fig(llm_perf_df):
|
|
42 |
"xanchor": "center",
|
43 |
"yanchor": "top",
|
44 |
},
|
45 |
-
xaxis_title="Per
|
46 |
yaxis_title="Open LLM Score (%)",
|
47 |
legend_title="LLM Architecture",
|
48 |
width=1200,
|
|
|
42 |
"xanchor": "center",
|
43 |
"yanchor": "top",
|
44 |
},
|
45 |
+
xaxis_title="Per 256 Tokens Latency (s)",
|
46 |
yaxis_title="Open LLM Score (%)",
|
47 |
legend_title="LLM Architecture",
|
48 |
width=1200,
|
src/llm_perf.py
CHANGED
@@ -101,10 +101,6 @@ def get_llm_perf_df(machine: str = "hf-dgx-01"):
|
|
101 |
].apply(lambda x: process_quantization_scheme(x), axis=1)
|
102 |
# add arch
|
103 |
llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
|
104 |
-
# add decode throughput
|
105 |
-
llm_perf_df["decode.throughput(tokens/s)"] = (
|
106 |
-
1000 / (llm_perf_df["generate.latency(s)"] - llm_perf_df["forward.latency(s)"])
|
107 |
-
).round(2)
|
108 |
# filter columns
|
109 |
llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
|
110 |
# rename columns
|
|
|
101 |
].apply(lambda x: process_quantization_scheme(x), axis=1)
|
102 |
# add arch
|
103 |
llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
|
|
|
|
|
|
|
|
|
104 |
# filter columns
|
105 |
llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
|
106 |
# rename columns
|