Spaces:

optimum
/

auto-benchmark

Sleeping

File size: 5,973 Bytes

d1e3b68

import random
import subprocess
import gradio as gr
from ansi2html import Ansi2HTMLConverter
from optimum_benchmark.task_utils import (
    TASKS_TO_AUTOMODELS,
    infer_task_from_model_name_or_path,
)


def get_backend_config():
    return [
        # seed
        gr.Textbox(label="backend.seed", value=42),
        # inter_op_num_threads
        gr.Textbox(
            label="backend.inter_op_num_threads",
            value=None,
            placeholder=None,
        ),
        # intra_op_num_threads
        gr.Textbox(
            label="backend.intra_op_num_threads",
            value=None,
            placeholder=None,
        ),
        # initial_isolation_check
        gr.Checkbox(label="backend.initial_isolation_check", value=True),
        # continous_isolation_check
        gr.Checkbox(label="backend.continous_isolation_check", value=True),
        # delete_cache
        gr.Checkbox(label="backend.delete_cache", value=False),
    ]


def get_inference_config():
    return [
        # duration
        gr.Textbox(label="benchmark.duration", value=10),
        # warmup runs
        gr.Textbox(label="benchmark.warmup_runs", value=1),
    ]


def get_pytorch_config():
    return [
        # no_weights
        gr.Checkbox(label="backend.no_weights"),
        # device_map
        gr.Dropdown(["auto", "sequential"], label="backend.device_map"),
        # torch_dtype
        gr.Dropdown(
            ["bfloat16", "float16", "float32", "auto"],
            label="backend.torch_dtype",
        ),
        # disable_grad
        gr.Checkbox(label="backend.disable_grad"),
        # eval_mode
        gr.Checkbox(label="backend.eval_mode"),
        # amp_autocast
        gr.Checkbox(label="backend.amp_autocast"),
        # amp_dtype
        gr.Dropdown(["bfloat16", "float16"], label="backend.amp_dtype"),
        # torch_compile
        gr.Checkbox(label="backend.torch_compile"),
        # bettertransformer
        gr.Checkbox(label="backend.bettertransformer"),
        # quantization_scheme
        gr.Dropdown(["gptq", "bnb"], label="backend.quantization_scheme"),
        # use_ddp
        gr.Checkbox(label="backend.use_ddp"),
        # peft_strategy
        gr.Textbox(label="backend.peft_strategy"),
    ]


conv = Ansi2HTMLConverter()


def run_experiment(kwargs):
    arguments = [
        "optimum-benchmark",
        "--config-dir",
        "./",
        "--config-name",
        "base_config",
    ]
    for key, value in kwargs.items():
        arguments.append(f"{key.label}={value if value != '' else 'null'}")

    # stream subprocess output
    process = subprocess.Popen(
        arguments,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
    )

    ansi_text = ""
    for ansi_line in iter(process.stdout.readline, ""):
        # stream process output
        print(ansi_line, end="")
        # append line to ansi text
        ansi_text += ansi_line
        # convert ansi to html
        html_text = conv.convert(ansi_text)
        # extract style from html
        style = html_text.split('<style type="text/css">')[1].split("</style>")[0]
        # parse style into dict
        style_dict = {}
        for line in style.split("\n"):
            if line:
                key, value = line.split("{")
                key = key.replace(".", "").strip()
                value = value.split("}")[0].strip()
                style_dict[key] = value

        # replace style in html
        for key, value in style_dict.items():
            html_text = html_text.replace(f'class="{key}"', f'style="{value}"')

        yield html_text

    return html_text


with gr.Blocks() as demo:
    # title text
    gr.HTML("<h1 style='text-align: center'>🤗 Optimum Benchmark 🏋️</h1>")
    # explanation text
    gr.Markdown(
        "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git)."
    )

    model = gr.Textbox(
        label="model",
        value="bert-base-uncased",
    )
    task = gr.Dropdown(
        label="task",
        value="text-classification",
        choices=list(TASKS_TO_AUTOMODELS.keys()),
    )
    device = gr.Dropdown(
        value="cpu",
        choices=["cpu", "cuda"],
        label="device",
    )
    expetiment_name = gr.Textbox(
        label="experiment_name",
        value=f"experiment_{random.getrandbits(16)}",
    )

    model.submit(fn=infer_task_from_model_name_or_path, inputs=[model], outputs=[task])

    with gr.Row():
        with gr.Column(variant="panel"):
            backend = gr.Dropdown(
                ["pytorch", "onnxruntime", "openvino", "neural-compressor"],
                label="backend",
                value="pytorch",
                container=True,
            )

        with gr.Column(variant="panel"):
            with gr.Accordion(label="Backend Config", open=False):
                backend_config = get_backend_config() + get_pytorch_config()

    with gr.Row():
        with gr.Column(variant="panel"):
            benchmark = gr.Dropdown(
                choices=["inference", "training"],
                label="benchmark",
                value="inference",
                container=True,
            )

        with gr.Column(variant="panel"):
            with gr.Accordion(label="Benchmark Config", open=False):
                benchmark_config = get_inference_config()

    # run benchmark button
    run_benchmark = gr.Button(value="Run Benchmark", variant="primary")
    # accordion with output logs
    with gr.Accordion(label="Logs:", open=True):
        logs = gr.HTML()

    run_benchmark.click(
        fn=run_experiment,
        inputs={
            expetiment_name,
            model,
            task,
            device,
            backend,
            benchmark,
            *backend_config,
            *benchmark_config,
        },
        outputs=[logs],
        queue=True,
    )


if __name__ == "__main__":
    demo.queue().launch()