joaogante's picture
joaogante HF staff
update generation type names
7e13cda
raw
history blame
10.9 kB
import matplotlib
matplotlib.use('Agg')
import functools
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# benchmark order: pytorch, tf eager, tf xla; units = ms
BENCHMARK_DATA = {
"Greedy Decoding": {
"DistilGPT2": {
"T4": [336.22, 3976.23, 115.84],
"3090": [158.38, 1835.82, 46.56],
"A100": [371.49, 4073.84, 60.94],
},
"GPT2": {
"T4": [607.31, 7140.23, 185.12],
"3090": [297.03, 3308.31, 76.68],
"A100": [691.75, 7323.60, 110.72],
},
"OPT-1.3B": {
"T4": [1303.41, 15939.07, 1488.15],
"3090": [428.33, 7259.43, 468.37],
"A100": [1125.00, 16713.63, 384.52],
},
"GPTJ-6B": {
"T4": [0, 0, 0],
"3090": [0, 0, 0],
"A100": [2664.28, 32783.09, 1440.06],
},
"T5 Small": {
"T4": [99.88, 1527.73, 18.78],
"3090": [55.09, 665.70, 9.25],
"A100": [124.91, 1642.07, 13.72],
},
"T5 Base": {
"T4": [416.56, 6095.05, 106.12],
"3090": [223.00, 2503.28, 46.67],
"A100": [550.76, 6504.11, 64.57],
},
"T5 Large": {
"T4": [645.05, 9587.67, 225.17],
"3090": [377.74, 4216.41, 97.92],
"A100": [944.17, 10572.43, 116.52],
},
"T5 3B": {
"T4": [1493.61, 13629.80, 1494.80],
"3090": [694.75, 6316.79, 489.33],
"A100": [1801.68, 16707.71, 411.93],
},
},
"Sampling": {
"DistilGPT2": {
"T4": [617.40, 6078.81, 221.65],
"3090": [310.37, 2843.73, 85.44],
"A100": [729.05, 7140.05, 121.83],
},
"GPT2": {
"T4": [1205.34, 12256.98, 378.69],
"3090": [577.12, 5637.11, 160.02],
"A100": [1377.68, 15605.72, 234.47],
},
"OPT-1.3B": {
"T4": [2166.72, 19126.25, 2341.32],
"3090": [706.50, 9616.97, 731.58],
"A100": [2019.70, 28621.09, 690.36],
},
"GPTJ-6B": {
"T4": [0, 0, 0],
"3090": [0, 0, 0],
"A100": [5150.35, 70554.07, 2744.49],
},
"T5 Small": {
"T4": [235.93, 3599.47, 41.07],
"3090": [100.41, 1093.33, 23.24],
"A100": [267.42, 3366.73, 28.53],
},
"T5 Base": {
"T4": [812.59, 7966.73, 196.85],
"3090": [407.81, 4904.54, 97.56],
"A100": [1033.05, 11521.97, 123.93],
},
"T5 Large": {
"T4": [1114.22, 16433.31, 424.91],
"3090": [647.61, 7184.71, 160.97],
"A100": [1668.73, 19962.78, 200.75],
},
"T5 3B": {
"T4": [2282.56, 20891.22, 2196.02],
"3090": [1011.32, 9735.97, 734.40],
"A100": [2769.64, 26440.65, 612.98],
},
},
"Beam Search": {
"DistilGPT2": {
"T4": [2407.89, 19442.60, 3313.92],
"3090": [998.52, 8286.03, 900.28],
"A100": [2237.41, 21771.40, 760.47],
},
"GPT2": {
"T4": [3767.43, 34813.93, 5559.42],
"3090": [1633.04, 14606.93, 1533.55],
"A100": [3705.43, 34586.23, 1295.87],
},
"OPT-1.3B": {
"T4": [16649.82, 78500.33, 21894.31],
"3090": [508518, 32822.81, 5762.46],
"A100": [5967.32, 78334.56, 4096.38],
},
"GPTJ-6B": {
"T4": [0, 0, 0],
"3090": [0, 0, 0],
"A100": [15119.10, 134000.40, 10214.17],
},
"T5 Small": {
"T4": [283.64, 25089.12, 1391.66],
"3090": [137.38, 10680.28, 486.96],
"A100": [329.28, 24747.38, 513.99],
},
"T5 Base": {
"T4": [1383.21, 44809.14, 3920.40],
"3090": [723.11, 18657.48, 1258.60],
"A100": [2360.85, 45085.07, 1107.58],
},
"T5 Large": {
"T4": [1663.50, 81902.41, 9551.29],
"3090": [922.53, 35524.30, 2838.86],
"A100": [2168.22, 86890.00, 2373.04],
},
"T5 3B": {
"T4": [0, 0, 0],
"3090": [1521.05, 35337.30, 8282.09],
"A100": [3162.54, 88453.65, 5585.20],
},
},
}
FIGURE_PATH = "plt.png"
FIG_DPI = 300
def get_plot(model_name, plot_eager, generate_type):
df = pd.DataFrame(BENCHMARK_DATA[generate_type][model_name])
df["framework"] = ["PyTorch", "TF (Eager Execution)", "TF (XLA)"]
df = pd.melt(df, id_vars=["framework"], value_vars=["T4", "3090", "A100"])
if plot_eager == "No":
df = df[df["framework"] != "TF (Eager Execution)"]
g = sns.catplot(
data=df,
kind="bar",
x="variable",
y="value",
hue="framework",
palette={"PyTorch": "blue", "TF (Eager Execution)": "orange", "TF (XLA)": "red"},
alpha=.9,
)
g.despine(left=True)
g.set_axis_labels("GPU", "Generation time (ms)")
g.legend.set_title("Framework")
# Add the number to the top of each bar
ax = g.facet_axis(0, 0)
for i in ax.containers:
ax.bar_label(i,)
plt.savefig(FIGURE_PATH, dpi=FIG_DPI)
return FIGURE_PATH
demo = gr.Blocks()
with demo:
gr.Markdown(
"""
# TensorFlow XLA Text Generation Benchmark
Instructions:
1. Pick a tab for the type of generation (or for benchmark information);
2. Select a model from the dropdown menu;
3. Optionally omit results from TensorFlow Eager Execution, if you wish to better compare the performance of
PyTorch to TensorFlow with XLA.
"""
)
with gr.Tabs():
with gr.TabItem("Greedy Decoding"):
plot_fn = functools.partial(get_plot, generate_type="Greedy Decoding")
with gr.Row():
with gr.Column():
model_selector = gr.Dropdown(
choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
value="T5 Small",
label="Model",
interactive=True,
)
eager_enabler = gr.Radio(
["Yes", "No"],
value="Yes",
label="Plot TF Eager Execution?",
interactive=True
)
gr.Markdown(
"""
### Greedy Decoding benchmark parameters
- `max_new_tokens = 64`;
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
"""
)
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
with gr.TabItem("Sampling"):
plot_fn = functools.partial(get_plot, generate_type="Sampling")
with gr.Row():
with gr.Column():
model_selector = gr.Dropdown(
choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
value="T5 Small",
label="Model",
interactive=True,
)
eager_enabler = gr.Radio(
["Yes", "No"],
value="Yes",
label="Plot TF Eager Execution?",
interactive=True
)
gr.Markdown(
"""
### Sampling benchmark parameters
- `max_new_tokens = 128`;
- `temperature = 2.0`;
- `top_k = 50`;
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
"""
)
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
with gr.TabItem("Beam Search"):
plot_fn = functools.partial(get_plot, generate_type="Beam Search")
with gr.Row():
with gr.Column():
model_selector = gr.Dropdown(
choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
value="T5 Small",
label="Model",
interactive=True,
)
eager_enabler = gr.Radio(
["Yes", "No"],
value="Yes",
label="Plot TF Eager Execution?",
interactive=True
)
gr.Markdown(
"""
### Beam Search benchmark parameters
- `max_new_tokens = 256`;
- `num_beams = 16`;
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
"""
)
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
with gr.TabItem("Benchmark Information"):
gr.Dataframe(
headers=["Parameter", "Value"],
value=[
["Transformers Version", "4.21"],
["TensorFlow Version", "2.9.1"],
["Pytorch Version", "1.11.0"],
["OS", "22.04 LTS (3090) / Debian 10 (other GPUs)"],
["CUDA", "11.6 (3090) / 11.3 (others GPUs)"],
["Number of Runs", "100 (the first run was discarded to ignore compilation time)"],
["Is there code to reproduce?", "Yes -- https://gist.github.com/gante/f0017e3f13ac11b0c02e4e4db351f52f"],
],
)
demo.launch()