File size: 2,706 Bytes
82130cb
 
5044033
 
82130cb
5044033
82130cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5044033
 
 
 
 
82130cb
5ea3cc9
 
 
 
82130cb
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import plotly.express as px
import streamlit as st

from src.architectures import *
from src.st_helpers import st_setup
from src.testing import TestGroup

def show_stats(for_test_group: str):
    test_group = TestGroup.for_test_group_tag(for_test_group)
    title = "No comment provided for group" if test_group.comment == "" else test_group.comment
    st.write(f"### {title}")
    st.write(f"Total of {test_group.num_tests} tests over {test_group.num_archs} architectures ({test_group.num_tests_per_arch} per architecture).")

    stats = test_group.summary_stats_by_arch()

    with st.expander("**Elapsed End to End Time (seconds)**"):
        data = []
        for arch in stats:
            for e in arch['elapsed']:
                data.append([arch['arch_name'], e/1000])
        df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time'])
        fig = px.box(df, x="Architecture", y="Elapsed time")
        fig.update_xaxes(tickangle=-90)
        st.plotly_chart(fig, use_container_width=True)

    with st.expander("**Average response length (count of characters)**"):
        data = []
        for arch in stats:
            for rl in arch['response_len']:
                data.append([arch['arch_name'], rl])
        df = pd.DataFrame(data, columns=['Architecture', 'Response length'])
        fig = px.box(df, x="Architecture", y="Response length")
        fig.update_xaxes(tickangle=-90)
        st.plotly_chart(fig, use_container_width=True)

    with st.expander("**Mean elapsed time by architecture step (seconds)**"):
        data = []
        for arch in stats:
            for step in arch['steps']:
                data.append([arch['arch_name'], step['step_name'], step['mean_elapsed'] / 1000])
        df = pd.DataFrame(data, columns=['Architecture', 'Step', 'Mean elapsed time'])
        fig = px.bar(df, x='Architecture', y='Mean elapsed time', color='Step', barmode='stack')
        fig.update_xaxes(tickangle=-90)
        st.plotly_chart(fig, use_container_width=True)


if st_setup('LLM Arch'):
    summary = st.container()
    with summary:
        st.write("# Test Reporter")
        TestGroup.load_all()
        if st.button("Reload traces"):
            TestGroup.load_all(True)
            st.rerun()

        selector, display = st.columns([2, 3])

        with selector:
            test_groups = list(TestGroup.all.values())
            test_groups.sort(key=lambda x: -x.start)
            options = [f'{tg.test_group}: {tg.comment}' for tg in test_groups]
            if selected := st.radio('**Pick a test set to review**', options=options, index=None):
                with display:
                    show_stats(selected.split(":")[0])