alfraser commited on
Commit
ab87be2
·
1 Parent(s): c0f0676

Added a test runner page which allows you to run a batch of test from the UI

Browse files
Files changed (1) hide show
  1. pages/030_Test_Runner.py +45 -0
pages/030_Test_Runner.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from src.architectures import *
4
+ from src.common import generate_group_tag
5
+ from src.testing import TestGenerator
6
+ from src.st_helpers import st_setup
7
+
8
+
9
+ if Architecture.architectures is None:
10
+ Architecture.load_architectures()
11
+
12
+ if st_setup('LLM Arch'):
13
+ summary = st.container()
14
+ with summary:
15
+ st.write("# Test Runner")
16
+ st.write("## Run a new test")
17
+ st.write("### Comment:")
18
+ comment = st.text_input("Optional comment for the test")
19
+
20
+ st.write("### Architectures to include:")
21
+ selected_archs = st.multiselect(label="Architectures", options=[a.name for a in Architecture.architectures])
22
+
23
+ st.write("### Number of questions to ask:")
24
+ q_count = st.slider(label="Number of questions", min_value=1, max_value=TestGenerator.question_count(), step=1)
25
+
26
+ st.write("### Tag:")
27
+ tag = generate_group_tag()
28
+ st.write(f'Test will be tagged as "{tag}" - record this for easy searching later')
29
+
30
+ total_tests = len(selected_archs) * q_count
31
+ st.write("### Run:")
32
+ st.write(f"**{total_tests}** total tests will be run")
33
+ if st.button("**Run**", disabled=(total_tests==0)):
34
+ progress = st.progress(0.0, text="Running tests...")
35
+ questions = TestGenerator.get_random_questions(q_count)
36
+ num_complete = 0
37
+ for arch_name in selected_archs:
38
+ architecture = Architecture.get_architecture(arch_name)
39
+ for q in questions:
40
+ architecture(ArchitectureRequest(q), trace_tags=[tag, "TestRunner"], trace_comment=comment)
41
+ num_complete += 1
42
+ if num_complete == total_tests:
43
+ progress.empty()
44
+ else:
45
+ progress.progress(num_complete/total_tests, f"Run {num_complete} of {total_tests} tests...")