Spaces:
Running
Running
File size: 4,249 Bytes
c8763bd 134a499 c8763bd 134a499 ab5f5f1 dc685a9 ab5f5f1 2460b35 ab5f5f1 6f3a090 0f1bf97 ab5f5f1 0f1bf97 c8763bd efc3d5b 7406034 ab5f5f1 b3a1bf0 c8763bd 6f3a090 ab5f5f1 4b40065 ab5f5f1 dc685a9 ab5f5f1 dc685a9 ab5f5f1 dc685a9 2460b35 ab5f5f1 dc685a9 d19e350 ab5f5f1 d19e350 ab5f5f1 d19e350 134a499 d19e350 ab5f5f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
import gradio as gr
from src.control_panel import create_control_panel, create_control_callback
from src.latency_score_memory import create_lat_score_mem_plot
from src.quantization_kernels import create_quant_plots
from src.leaderboard import create_leaderboard_table
from src.bettertransformer import create_bt_plots
from src.flashattentionv2 import create_fa2_plots
from src.llm_perf import get_llm_perf_df
from src.assets import custom_css
from src.content import (
LOGO,
TITLE,
ABOUT,
INTRODUCTION,
EXAMPLE_CONFIG,
CITATION_BUTTON,
CITATION_BUTTON_LABEL,
)
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB-275W π₯οΈ", "audace": "RTX4090-24GB-450W π»"}
HF_TOKEN = os.environ.get("HF_TOKEN", None)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(LOGO, elem_classes="logo")
gr.HTML(TITLE, elem_classes="title")
gr.Markdown(INTRODUCTION, elem_classes="descriptive-text")
####################### HARDWARE TABS #######################
with gr.Tabs(elem_classes="tabs"):
for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
with gr.TabItem(hardware, id=id):
####################### CONTROL PANEL #######################
(
filter_button,
machine_textbox,
search_bar,
score_slider,
memory_slider,
backend_checkboxes,
datatype_checkboxes,
optimization_checkboxes,
quantization_checkboxes,
) = create_control_panel()
####################### HARDWARE SUBTABS #######################
with gr.Tabs(elem_classes="subtabs"):
llm_perf_df = get_llm_perf_df(machine=machine)
####################### LEADERBOARD TAB #######################
with gr.TabItem("Leaderboard π
", id=0):
leaderboard_table = create_leaderboard_table(llm_perf_df)
lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df)
####################### BETTERTRANSFORMER SPEEDUP TAB #######################
with gr.TabItem("BetterTransformer π", id=2):
bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df)
with gr.TabItem("FlashAttentionV2 π", id=3):
fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df)
with gr.TabItem("Custom Quantization Kernels π", id=4):
quant_prefill_plot, quant_decode_plot = create_quant_plots(llm_perf_df)
####################### CONTROL CALLBACK #######################
create_control_callback(
filter_button,
# inputs
machine_textbox,
search_bar,
score_slider,
memory_slider,
backend_checkboxes,
datatype_checkboxes,
optimization_checkboxes,
quantization_checkboxes,
# outputs
leaderboard_table,
lat_score_mem_plot,
bt_prefill_plot,
bt_decode_plot,
fa2_prefill_plot,
fa2_decode_plot,
quant_prefill_plot,
quant_decode_plot,
)
####################### ABOUT TAB #######################
with gr.TabItem("About π", id=3):
gr.HTML(ABOUT, elem_classes="descriptive-text")
gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text")
####################### CITATION
with gr.Row():
with gr.Accordion("π Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
if __name__ == "__main__":
# Launch demo
demo.queue().launch()
|