File size: 4,472 Bytes
0227006 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import math
import numpy as np
import pandas as pd
import plotly.express as px
# 1
def compute_pairwise_win_fraction(battles):
# Times each model wins as Model A
a_win_ptbl = pd.pivot_table(
battles[battles["win"] == "model_a"],
index="model_a",
columns="model_b",
aggfunc="size",
fill_value=0,
)
# Table counting times each model wins as Model B
b_win_ptbl = pd.pivot_table(
battles[battles["win"] == "model_b"],
index="model_a",
columns="model_b",
aggfunc="size",
fill_value=0,
)
# Table counting number of A-B pairs
num_battles_ptbl = pd.pivot_table(battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0)
# Computing the proportion of wins for each model as A and as B
# against all other models
row_beats_col_freq = (a_win_ptbl + b_win_ptbl.T) / (num_battles_ptbl + num_battles_ptbl.T)
# Arrange ordering according to proprition of wins
prop_wins = row_beats_col_freq.mean(axis=1).sort_values(ascending=False)
model_names = list(prop_wins.keys())
row_beats_col = row_beats_col_freq.loc[model_names, model_names]
return row_beats_col
def visualize_pairwise_win_fraction(battles, title):
row_beats_col = compute_pairwise_win_fraction(battles)
fig = px.imshow(row_beats_col, color_continuous_scale="RdBu", text_auto=".2f", title=title)
fig.update_layout(
xaxis_title="Model B",
yaxis_title="Model A",
xaxis_side="top",
title_y=0.07,
title_x=0.5,
)
fig.update_traces(hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Fraction of A Wins: %{z}<extra></extra>")
return fig
# 2
def switch_model_a_b(df):
df_switch = df.copy()
# switch with probability 0.5
for i, row in df.iterrows():
if np.random.rand() < 0.5:
df_switch.at[i, "model_a"] = row["model_b"]
df_switch.at[i, "model_b"] = row["model_a"]
if row["win"] == "model_a":
df_switch.at[i, "win"] = "model_b"
elif row["win"] == "model_b":
df_switch.at[i, "win"] = "model_a"
return df_switch
def visualize_battle_count(battles, title):
ptbl = pd.pivot_table(battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0)
battle_counts = ptbl + ptbl.T
ordering = battle_counts.sum().sort_values(ascending=False).index
fig = px.imshow(battle_counts.loc[ordering, ordering], title=title, text_auto=True, width=600)
fig.update_layout(
xaxis_title="Model B",
yaxis_title="Model A",
xaxis_side="top",
title_y=0.07,
title_x=0.5,
)
fig.update_traces(hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Count: %{z}<extra></extra>")
return fig
# 3
def get_bootstrap_result(battles, func_compute_elo, num_round):
rows = [func_compute_elo(battles.sample(frac=1.0, replace=True)) for _ in range(num_round)]
df = pd.DataFrame(rows)
return df[df.median().sort_values(ascending=False).index]
def visualize_bootstrap_scores(df, title):
bars = (
pd.DataFrame(
dict(
lower=df.quantile(0.025),
rating=df.quantile(0.5),
upper=df.quantile(0.975),
)
)
.reset_index(names="model")
.sort_values("rating", ascending=False)
)
bars["error_y"] = bars["upper"] - bars["rating"]
bars["error_y_minus"] = bars["rating"] - bars["lower"]
bars["rating_rounded"] = np.round(bars["rating"], 2)
fig = px.scatter(
bars,
x="model",
y="rating",
error_y="error_y",
error_y_minus="error_y_minus",
text="rating_rounded",
title=title,
)
fig.update_layout(xaxis_title="Model", yaxis_title="Rating")
return fig
# 4
def visualize_rating_count(df, title):
df_all_value_counts = pd.concat([df["model_a"], df["model_b"]]).value_counts()
fig = px.bar(df_all_value_counts, title=title, text_auto=True)
min_y = df_all_value_counts.min()
max_y = df_all_value_counts.max()
y_end = math.ceil(min_y / 100) * 100
y_begin = math.floor(max_y / 100) * 100
fig.update_layout(xaxis_title="model", yaxis_title="Rating Count", showlegend=False)
fig.update_yaxes(range=[y_begin, y_end])
# save the plot for the blog:
fig.write_html("model_counts.html", full_html=False, include_plotlyjs="cdn")
return fig
|