Spaces:
Runtime error
Runtime error
import pandas as pd | |
import plotly.express as px | |
import streamlit as st | |
from src.architectures import * | |
from src.st_helpers import st_setup | |
from src.testing import TestGroup | |
def show_stats(for_test_group: str): | |
test_group = TestGroup.for_test_group_tag(for_test_group) | |
title = "No comment provided for group" if test_group.comment == "" else test_group.comment | |
st.write(f"### {title}") | |
st.write(f"Total of {test_group.num_tests} tests over {test_group.num_archs} architectures ({test_group.num_tests_per_arch} per architecture).") | |
stats = test_group.summary_stats_by_arch() | |
with st.expander("**Elapsed End to End Time (seconds)**"): | |
data = [] | |
for arch in stats: | |
for e in arch['elapsed']: | |
data.append([arch['arch_name'], e/1000]) | |
df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time']) | |
fig = px.box(df, x="Architecture", y="Elapsed time") | |
fig.update_xaxes(tickangle=-90) | |
st.plotly_chart(fig, use_container_width=True) | |
with st.expander("**Average response length (count of characters)**"): | |
data = [] | |
for arch in stats: | |
for rl in arch['response_len']: | |
data.append([arch['arch_name'], rl]) | |
df = pd.DataFrame(data, columns=['Architecture', 'Response length']) | |
fig = px.box(df, x="Architecture", y="Response length") | |
fig.update_xaxes(tickangle=-90) | |
st.plotly_chart(fig, use_container_width=True) | |
with st.expander("**Mean elapsed time by architecture step (seconds)**"): | |
data = [] | |
for arch in stats: | |
for step in arch['steps']: | |
data.append([arch['arch_name'], step['step_name'], step['mean_elapsed'] / 1000]) | |
df = pd.DataFrame(data, columns=['Architecture', 'Step', 'Mean elapsed time']) | |
fig = px.bar(df, x='Architecture', y='Mean elapsed time', color='Step', barmode='stack') | |
fig.update_xaxes(tickangle=-90) | |
st.plotly_chart(fig, use_container_width=True) | |
if st_setup('LLM Arch'): | |
summary = st.container() | |
with summary: | |
st.write("# Test Reporter") | |
TestGroup.load_all() | |
if st.button("Reload traces"): | |
TestGroup.load_all(True) | |
st.rerun() | |
selector, display = st.columns([2, 3]) | |
with selector: | |
test_groups = list(TestGroup.all.values()) | |
test_groups.sort(key=lambda x: -x.start) | |
options = [f'{tg.test_group}: {tg.comment}' for tg in test_groups] | |
if selected := st.radio('**Pick a test set to review**', options=options, index=None): | |
with display: | |
show_stats(selected.split(":")[0]) | |