""" This module provides functionality for displaying and analyzing model benchmark results. It includes functions for data processing, sorting, and a Gradio interface for user interaction. """ import logging import re import gradio as gr import pandas as pd from results import results logging.basicConfig(level=logging.DEBUG) def get_model_names(): """ Retrieve a sorted list of model names from the results data. Returns: list: Sorted list of model names. """ return sorted([model["name"] for model in results["models"]]) def get_models_by_architecture(model_name): """ Retrieve models with the same architecture as the specified model. Args: model_name (str): Name of the model to match architecture. Returns: list: List of models with the same architecture. """ selected_model = next( (m for m in results["models"] if m["name"] == model_name), None ) if not selected_model: return [] model_type = selected_model.get("modelType", "") return [m for m in results["models"] if m.get("modelType", "") == model_type] def custom_sort_key(instance_type): """ Generate a custom sorting key for instance types. Args: instance_type (str): The instance type to generate a key for. Returns: tuple: A tuple used for sorting, containing (family, size_index). """ size_order = [ "xlarge", "2xlarge", "4xlarge", "8xlarge", "12xlarge", "16xlarge", "24xlarge", "48xlarge", ] match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type) if match: family, size = match.groups() return ( family, size_order.index(size) if size in size_order else len(size_order), ) return (instance_type, 0) # Fallback for non-standard instance types def display_results(model_name): """ Process and display results for a given model. Args: model_name (str): Name of the model to display results for. Returns: tuple: A tuple containing: - str: Markdown formatted string with model information. - pandas.DataFrame: Styled DataFrame with the results. """ try: models = get_models_by_architecture(model_name) if not models: logging.warning("No models found for %s", model_name) return ( f"No results found for the selected model: {model_name}", pd.DataFrame(), ) model_type = models[0].get("modelType", "N/A") data = [] merged_models = set() for model in models: merged_models.add(model.get("name", "Unknown")) for config in model.get("configurations", []): try: cloud = config.get("cloud", "N/A") instance_type = config.get("instanceType", "N/A") if "configurations" in config: for nested_config in config["configurations"]: data.append( { "Cloud": cloud, "Instance Type": instance_type, "GPU": config.get("gpu", "N/A"), "GPU RAM": config.get("gpuRAM", "N/A"), "Status": nested_config.get("status", "N/A"), "Quantization": nested_config.get( "quantization", "N/A" ), "Container": nested_config.get( "container", nested_config.get("tgi", "N/A"), ), "Tokens per Second": nested_config.get( "tokensPerSecond", "N/A" ), "Notes": nested_config.get("notes", ""), } ) else: data.append( { "Cloud": cloud, "Instance Type": instance_type, "GPU": config.get("gpu", "N/A"), "GPU RAM": config.get("gpuRAM", "N/A"), "Status": config.get("status", "N/A"), "Quantization": config.get("quantization", "N/A"), "Container": config.get( "container", config.get("tgi", "N/A") ), "Tokens per Second": config.get( "tokensPerSecond", "N/A" ), "Notes": config.get("notes", ""), } ) except (KeyError, ValueError, TypeError) as e: logging.error("Error processing configuration: %s", e) continue if not data: logging.warning("No data extracted for %s", model_name) return ( f"No data for the selected model: {model_name}", pd.DataFrame(), ) merged_models_message = ( f"Note: Results merged from models: {', '.join(merged_models)}" if len(merged_models) > 1 else None ) sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"])) result_text = f"## Results for {model_name}\n\nModel Type: {model_type}" if merged_models_message: result_text += f"\n\n{merged_models_message}" df = pd.DataFrame(sorted_data) def color_status(val): if val == "OK": return "background-color: green; color: white" if val == "KO": return "background-color: red; color: white" return "" styled_df = df.style.applymap(color_status, subset=["Status"]) return result_text, styled_df except (KeyError, ValueError, TypeError) as e: logging.exception("Error in display_results: %s", e) return ( f"An error for {model_name}: {str(e)}", pd.DataFrame(), ) with gr.Blocks() as demo: gr.Markdown("# Model Benchmark Results") gr.Markdown( """This table shows the benchmark results for each model. \n [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher), [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html), etc.) are default unless noted.""" ) model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model") results_text = gr.Markdown() results_output = gr.DataFrame(label="Results") model_dropdown.change( display_results, inputs=[model_dropdown], outputs=[results_text, results_output] ) if __name__ == "__main__": demo.launch()