Spaces:
Sleeping
Sleeping
""" | |
This module provides functionality for displaying and analyzing model benchmark results. | |
It includes functions for data processing, sorting, and a Gradio interface for user interaction. | |
""" | |
import logging | |
import re | |
import gradio as gr | |
import pandas as pd | |
from results import instance_type_mappings, results | |
logging.basicConfig(level=logging.DEBUG) | |
def get_model_names(): | |
""" | |
Retrieve a sorted list of model names from the results data. | |
Returns: | |
list: Sorted list of model names. | |
""" | |
return sorted([model["name"] for model in results["models"]]) | |
def get_models_by_architecture(model_name): | |
""" | |
Retrieve models with the same architecture as the specified model. | |
Args: | |
model_name (str): Name of the model to match architecture. | |
Returns: | |
list: List of models with the same architecture. | |
""" | |
selected_model = next( | |
(m for m in results["models"] if m["name"] == model_name), None | |
) | |
if not selected_model: | |
return [] | |
model_type = selected_model.get("modelType", "") | |
return [m for m in results["models"] if m.get("modelType", "") == model_type] | |
def custom_sort_key(instance_type): | |
""" | |
Generate a custom sorting key for instance types. | |
Args: | |
instance_type (str): The instance type to generate a key for. | |
Returns: | |
tuple: A tuple used for sorting, containing (family, size_index). | |
""" | |
size_order = [ | |
"xlarge", | |
"2xlarge", | |
"4xlarge", | |
"8xlarge", | |
"12xlarge", | |
"16xlarge", | |
"24xlarge", | |
"48xlarge", | |
] | |
match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type) | |
if match: | |
family, size = match.groups() | |
return ( | |
family, | |
size_order.index(size) if size in size_order else len(size_order), | |
) | |
return (instance_type, 0) # Fallback for non-standard instance types | |
def process_model_data(models): | |
"""Process model data and return a list of configurations.""" | |
data = [] | |
for model in models: | |
for config in model.get("configurations", []): | |
process_configuration(config, data) | |
return data | |
def process_configuration(config, data): | |
"""Process a single configuration and append to data list.""" | |
instance_type = config.get("instanceType", "N/A") | |
instance_info = instance_type_mappings.get(instance_type, {}) | |
instance_data = { | |
"cloud": instance_info.get("cloud", "N/A"), | |
"gpu": instance_info.get("gpu", "N/A"), | |
"gpu_ram": instance_info.get("gpuRAM", "N/A"), | |
"instance_type": instance_type, | |
} | |
if "configurations" in config: | |
for nested_config in config["configurations"]: | |
append_config_data(nested_config, instance_data, data) | |
else: | |
append_config_data(config, instance_data, data) | |
def append_config_data(config, instance_data, data): | |
"""Append configuration data to the data list.""" | |
data.append( | |
{ | |
"Cloud": instance_data["cloud"], | |
"Instance Type": instance_data["instance_type"], | |
"GPU": instance_data["gpu"], | |
"GPU RAM": instance_data["gpu_ram"], | |
"Status": config.get("status", "N/A"), | |
"Quantization": config.get("quantization", "N/A"), | |
"Container": config.get("container", config.get("tgi", "N/A")), | |
"Tokens per Second": config.get("tokensPerSecond", 0), | |
"Notes": config.get("notes", ""), | |
} | |
) | |
def create_and_process_dataframe(data): | |
"""Create and process the DataFrame with CPI calculation.""" | |
df = pd.DataFrame(data) | |
df["CPI"] = df.apply(calculate_cpi, axis=1) | |
df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce") | |
df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce") | |
columns = df.columns.tolist() | |
tokens_per_second_index = columns.index("Tokens per Second") | |
columns.remove("CPI") | |
columns.insert(tokens_per_second_index + 1, "CPI") | |
df = df[columns] | |
return df.sort_values("CPI", ascending=False, na_position="last") | |
def calculate_cpi(row): | |
"""Calculate CPI for a given row.""" | |
instance_price = instance_type_mappings.get(row["Instance Type"], {}).get( | |
"price", 0 | |
) | |
tokens_per_second = row["Tokens per Second"] | |
try: | |
tokens_per_second = float(tokens_per_second) | |
if tokens_per_second > 0 and instance_price > 0: | |
return tokens_per_second / instance_price | |
return pd.NA | |
except (ValueError, TypeError): | |
return pd.NA | |
def style_dataframe(df): | |
"""Apply styling to the DataFrame.""" | |
def color_status(val): | |
if val == "OK": | |
return "background-color: green; color: white" | |
if val == "KO": | |
return "background-color: red; color: white" | |
return "" | |
return df.style.map(color_status, subset=["Status"]).format( | |
{"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A" | |
) | |
def display_results(model_name): | |
""" | |
Process and display results for a given model, including CPI calculation. | |
Args: | |
model_name (str): Name of the model to display results for. | |
Returns: | |
tuple: A tuple containing: | |
- str: Markdown formatted string with model information. | |
- pandas.DataFrame: Styled DataFrame with the results, including CPI. | |
""" | |
try: | |
models = get_models_by_architecture(model_name) | |
if not models: | |
logging.warning("No models found for %s", model_name) | |
return ( | |
f"No results found for the selected model: {model_name}", | |
pd.DataFrame(), | |
) | |
model_type = models[0].get("modelType", "N/A") | |
data = process_model_data(models) | |
if not data: | |
logging.warning("No data extracted for %s", model_name) | |
return f"No data for the selected model: {model_name}", pd.DataFrame() | |
merged_models = set(model.get("name", "Unknown") for model in models) | |
merged_models_message = ( | |
f"Note: Results merged from models: {', '.join(merged_models)}" | |
if len(merged_models) > 1 | |
else None | |
) | |
result_text = f"## Results for {model_name}\n\nModel Type: {model_type}" | |
if merged_models_message: | |
result_text += f"\n\n{merged_models_message}" | |
df = create_and_process_dataframe(data) | |
styled_df = style_dataframe(df) | |
return result_text, styled_df | |
except (KeyError, ValueError, TypeError) as e: | |
logging.exception("Error in display_results: %s", e) | |
return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame() | |
with gr.Blocks() as demo: | |
gr.Markdown("# Model Benchmark Results") | |
gr.Markdown( | |
"""This table shows the benchmark results for each model. \n\n | |
Configurations are default unless noted.\n | |
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher), | |
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html), | |
[SGLang](https://github.com/sgl-project/sglang), | |
[Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n | |
CPI means cost-perfomance index and is calculated as tokens per second / instance price.""" | |
) | |
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model") | |
results_text = gr.Markdown() | |
results_output = gr.DataFrame(label="Results") | |
model_dropdown.change( | |
display_results, inputs=[model_dropdown], outputs=[results_text, results_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |