Spaces:
Running
Running
import os | |
import gradio as gr | |
from src.control_panel import create_control_panel, create_control_callback | |
from src.latency_score_memory import create_lat_score_mem_plot | |
from src.quantization_kernels import create_quant_plots | |
from src.leaderboard import create_leaderboard_table | |
from src.bettertransformer import create_bt_plots | |
from src.flashattentionv2 import create_fa2_plots | |
from src.llm_perf import get_llm_perf_df | |
from src.assets import custom_css | |
from src.content import ( | |
LOGO, | |
TITLE, | |
ABOUT, | |
INTRODUCTION, | |
EXAMPLE_CONFIG, | |
CITATION_BUTTON, | |
CITATION_BUTTON_LABEL, | |
) | |
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB-275W π₯οΈ", "audace": "RTX4090-24GB-450W π»"} | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
demo = gr.Blocks(css=custom_css) | |
with demo: | |
gr.HTML(LOGO, elem_classes="logo") | |
gr.HTML(TITLE, elem_classes="title") | |
gr.Markdown(INTRODUCTION, elem_classes="descriptive-text") | |
####################### HARDWARE TABS ####################### | |
with gr.Tabs(elem_classes="tabs"): | |
for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()): | |
with gr.TabItem(hardware, id=id): | |
####################### CONTROL PANEL ####################### | |
( | |
filter_button, | |
machine_textbox, | |
search_bar, | |
score_slider, | |
memory_slider, | |
backend_checkboxes, | |
datatype_checkboxes, | |
optimization_checkboxes, | |
quantization_checkboxes, | |
) = create_control_panel() | |
####################### HARDWARE SUBTABS ####################### | |
with gr.Tabs(elem_classes="subtabs"): | |
llm_perf_df = get_llm_perf_df(machine=machine) | |
####################### LEADERBOARD TAB ####################### | |
with gr.TabItem("Leaderboard π ", id=0): | |
leaderboard_table = create_leaderboard_table(llm_perf_df) | |
lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df) | |
####################### BETTERTRANSFORMER SPEEDUP TAB ####################### | |
with gr.TabItem("BetterTransformer π", id=2): | |
bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df) | |
with gr.TabItem("FlashAttentionV2 π", id=3): | |
fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df) | |
with gr.TabItem("Custom Quantization Kernels π", id=4): | |
quant_prefill_plot, quant_decode_plot = create_quant_plots(llm_perf_df) | |
####################### CONTROL CALLBACK ####################### | |
create_control_callback( | |
filter_button, | |
# inputs | |
machine_textbox, | |
search_bar, | |
score_slider, | |
memory_slider, | |
backend_checkboxes, | |
datatype_checkboxes, | |
optimization_checkboxes, | |
quantization_checkboxes, | |
# outputs | |
leaderboard_table, | |
lat_score_mem_plot, | |
bt_prefill_plot, | |
bt_decode_plot, | |
fa2_prefill_plot, | |
fa2_decode_plot, | |
quant_prefill_plot, | |
quant_decode_plot, | |
) | |
####################### ABOUT TAB ####################### | |
with gr.TabItem("About π", id=3): | |
gr.HTML(ABOUT, elem_classes="descriptive-text") | |
gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text") | |
####################### CITATION | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
if __name__ == "__main__": | |
# Launch demo | |
demo.queue().launch() | |