Benchmarks / app.py
Julien Simon
Initial version
7200b01
raw
history blame
7.12 kB
import logging
import re
import gradio as gr
import pandas as pd
from results import results
logging.basicConfig(level=logging.DEBUG)
def get_model_names():
"""
Retrieve a sorted list of model names from the results data.
Returns:
list: Sorted list of model names.
"""
return sorted([model['name'] for model in results['models']])
def get_models_by_architecture(model_name):
"""
Retrieve models with the same architecture as the specified model.
Args:
model_name (str): Name of the model to match architecture.
Returns:
list: List of models with the same architecture.
"""
selected_model = next((m for m in results['models'] if m['name'] == model_name), None)
if not selected_model:
return []
model_type = selected_model.get('modelType', '')
return [m for m in results['models'] if m.get('modelType', '') == model_type]
def custom_sort_key(instance_type):
"""
Generate a custom sorting key for instance types.
Args:
instance_type (str): The instance type to generate a key for.
Returns:
tuple: A tuple used for sorting, containing (family, size_index).
"""
size_order = ['xlarge', '2xlarge', '4xlarge', '8xlarge', '12xlarge', '16xlarge', '24xlarge', '48xlarge']
match = re.match(r'([a-z]+\d+)\.(\w+)', instance_type)
if match:
family, size = match.groups()
return (family, size_order.index(size) if size in size_order else len(size_order))
return (instance_type, 0) # Fallback for non-standard instance types
def display_results(model_name):
"""
Process and display results for a given model.
This function retrieves model data, processes it, and formats it for display.
It handles nested configurations, merges data from multiple models if necessary,
and sorts the results by instance type.
Args:
model_name (str): Name of the model to display results for.
Returns:
tuple: A tuple containing:
- str: Markdown formatted string with model information.
- pandas.DataFrame: Styled DataFrame with the results.
"""
try:
models = get_models_by_architecture(model_name)
if not models:
logging.warning(f"No models found for {model_name}")
return f"No results found for the selected model: {model_name}", pd.DataFrame()
model_type = models[0].get('modelType', 'N/A')
data = {}
merged_models = set()
for model in models:
merged_models.add(model.get('name', 'Unknown'))
for config in model.get('configurations', []):
try:
instance_type = config['instanceType']
cloud = config.get('cloud', 'N/A')
key = (instance_type, cloud)
if 'configurations' in config:
for nested_config in config['configurations']:
nested_key = key + (nested_config.get('quantization', 'N/A'),)
data[nested_key] = {
"Cloud": cloud,
"Instance Type": instance_type,
"GPU": config.get('gpu', 'N/A'),
"GPU RAM": config.get('gpuRAM', 'N/A'),
"Status": nested_config.get('status', 'N/A'),
"Quantization": nested_config.get('quantization', 'N/A'),
"TGI": nested_config.get('tgi', 'N/A'),
"Tokens per Second": nested_config.get('tokensPerSecond', 'N/A'),
"Notes": nested_config.get('notes', '')
}
else:
data[key] = {
"Cloud": cloud,
"Instance Type": instance_type,
"GPU": config.get('gpu', 'N/A'),
"GPU RAM": config.get('gpuRAM', 'N/A'),
"Status": config.get('status', 'N/A'),
"Quantization": config.get('quantization', 'N/A'),
"TGI": config.get('tgi', 'N/A'),
"Tokens per Second": config.get('tokensPerSecond', 'N/A'),
"Notes": config.get('notes', '')
}
except KeyError as e:
logging.error(f"KeyError in config: {e}")
continue
if not data:
logging.warning(f"No data extracted for {model_name}")
return f"No data could be extracted for the selected model: {model_name}", pd.DataFrame()
# Merge data if there are conflicts
for key, value in data.items():
for field in value:
if value[field] == 'N/A':
for other_key, other_value in data.items():
if other_key[0] == key[0] and other_value[field] != 'N/A':
value[field] = other_value[field]
break
# Filter out rows where Status is 'N/A'
data = {k: v for k, v in data.items() if v['Status'] != 'N/A'}
merged_models_message = f"Note: Results merged from models: {', '.join(merged_models)}" if len(merged_models) > 1 else None
# Sort the data by instance type
sorted_data = sorted(data.values(), key=lambda x: custom_sort_key(x['Instance Type']))
results = f"## Results for {model_name}\n\nModel Type: {model_type}"
if merged_models_message:
results += f"\n\n{merged_models_message}"
df = pd.DataFrame(sorted_data)
def color_status(val):
if val == 'OK':
return 'background-color: green; color: white'
elif val == 'KO':
return 'background-color: red; color: white'
else:
return ''
styled_df = df.style.applymap(color_status, subset=['Status'])
return results, styled_df
except Exception as e:
logging.exception(f"Error in display_results: {e}")
return f"An error occurred while processing results for {model_name}: {str(e)}", pd.DataFrame()
with gr.Blocks() as demo:
gr.Markdown("# Model Benchmark Results")
gr.Markdown("This table shows the benchmark results for each model. [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher) and [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html) settings are default unless noted.")
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
results_text = gr.Markdown()
results_output = gr.DataFrame(label="Results")
model_dropdown.change(
display_results,
inputs=[model_dropdown],
outputs=[results_text, results_output]
)
if __name__ == "__main__":
demo.launch()