AryaWu commited on
Commit
1a74ea4
·
verified ·
1 Parent(s): 3866edb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import argparse
4
+ import pandas as pd
5
+ from functools import partial
6
+ import subprocess
7
+ """
8
+ Eventually add filters based on headers
9
+ """
10
+ HEADERS = ["__index_level_0__", "problem", "username", "entrypoint", "tests_passed", "total_tests"]
11
+ DATATYPES = ["number", "str", "str", "str", "number", "number"]
12
+
13
+ SUCCESS_HEADERS = ["is_success", "first_attempt","is_first_success", "last_attempt", "is_last_success"]
14
+ SUCCESS_DATATYPES = ["bool"]*5
15
+
16
+ def capture_output(prompt, completion, prints):
17
+ code = "\n".join([prompt, " "+" \n".join(completion.split("\n")), prints])
18
+ outputs = subprocess.run(["python", "-c", code], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
19
+ stderr = gr.Textbox(outputs.stderr.decode("utf-8").strip(), label="Code Errors", type="text")
20
+ stdout = gr.Code(outputs.stdout.decode("utf-8").strip(), label="Code Ouputs", language="python")
21
+ return stderr, stdout
22
+
23
+ def update_components(
24
+ ds,
25
+ slider,
26
+ header_data,
27
+ success_data,
28
+ prompt,
29
+ submitted_text,
30
+ completion,
31
+ assertions,
32
+ prints,
33
+ code_err,
34
+ code_output
35
+ ):
36
+ if isinstance(ds, gr.State):
37
+ ds = ds.value
38
+ row = ds.iloc[[slider]]
39
+ header_data = gr.Dataframe(
40
+ headers=HEADERS,
41
+ datatype=DATATYPES,
42
+ row_count=1,
43
+ col_count=(len(HEADERS), "fixed"),
44
+ column_widths=["60px"]*len(HEADERS),
45
+ value=row[HEADERS],
46
+ interactive=False
47
+ )
48
+ success_data = gr.Dataframe(
49
+ headers=SUCCESS_HEADERS,
50
+ datatype=SUCCESS_DATATYPES,
51
+ row_count=1,
52
+ col_count=(len(SUCCESS_HEADERS), "fixed"),
53
+ column_widths=["60px"]*len(SUCCESS_HEADERS),
54
+ value=row[SUCCESS_HEADERS],
55
+ interactive=False
56
+ )
57
+ row = row.iloc[0]
58
+ prompt = gr.Code(row["prompt"], language="python", label="Prompt")
59
+ submitted_text = gr.Textbox(row["submitted_text"], type="text", label="Submitted Text")
60
+ completion = gr.Code(row["completion"], language="python", label="Completion")
61
+ assertions = gr.Code(row["assertions"], language="python", label="Assertions")
62
+ prints = gr.Code(row["prints"], language="python", label="Prints")
63
+ code_err = gr.Textbox("__stderr__", label="Code Errors", type="text")
64
+ code_output = gr.Code("__stdout__", language="python", label="Code Outputs")
65
+ slider = gr.Slider(0, len(ds) - 1, step=1, label="Problem ID (click and arrow keys to navigate):", value=slider)
66
+ return [slider, header_data, success_data, prompt, submitted_text,
67
+ completion, assertions, prints, code_err, code_output]
68
+
69
+ def filter_by(
70
+ dataset_name,
71
+ dataset_split,
72
+ fs_box, ls_box, ff_box, lf_box, f_box, l_box, is_success_box,
73
+ problem_box,
74
+ student_box,
75
+ slider,
76
+ *components_to_update):
77
+ ds = load_dataset(dataset_name, split=dataset_split)
78
+ success_boxes = [fs_box, ls_box, ff_box, lf_box, f_box, l_box, is_success_box]
79
+ ds = ds.to_pandas()
80
+ labels = ["is_first_success","is_last_success","is_first_failure","is_last_failure",
81
+ "first_attempt","last_attempt","is_success"]
82
+ for label, box in zip(labels, success_boxes):
83
+ if box:
84
+ ds = ds[ds[label] == box]
85
+
86
+ if problem_box != None:
87
+ ds = ds[ds["problem"] == problem_box]
88
+
89
+ if student_box != None:
90
+ ds = ds[ds["username"] == student_box]
91
+
92
+ dataset = gr.State(ds)
93
+ return [dataset, *update_components(ds, 0, *components_to_update)]
94
+
95
+ def main(args):
96
+ ds = load_dataset(args.dataset, split=args.split)
97
+ ds = ds.to_pandas()
98
+ callback = gr.SimpleCSVLogger()
99
+ student_usernames = list(set(ds["username"]))
100
+ student_usernames.sort(key=lambda x: int(x.replace("student","")))
101
+ problem_names = list(set(ds["problem"]))
102
+ problem_names.sort()
103
+
104
+ with gr.Blocks(theme="gradio/monochrome") as demo:
105
+ dataset = gr.State(ds)
106
+ # slider for selecting problem id
107
+ slider = gr.Slider(0, len(ds) - 1, step=1, label="Problem ID (click and arrow keys to navigate):")
108
+ # display headers in dataframe for problem id
109
+ header_data = gr.Dataframe(
110
+ headers=HEADERS,
111
+ datatype=DATATYPES,
112
+ row_count=1,
113
+ col_count=(len(HEADERS), "fixed"),
114
+ column_widths=["60px"]*len(HEADERS),
115
+ interactive=False,
116
+ )
117
+ success_data = gr.Dataframe(
118
+ headers=SUCCESS_HEADERS,
119
+ datatype=SUCCESS_DATATYPES,
120
+ row_count=1,
121
+ col_count=(len(SUCCESS_HEADERS), "fixed"),
122
+ column_widths=["60px"]*len(SUCCESS_HEADERS),
123
+ interactive=False,
124
+ )
125
+
126
+ prompt = gr.Code("__prompt__", language="python", label="Prompt")
127
+ submitted_text = gr.Textbox("__submitted_text__", type="text", label="Submitted Text")
128
+ completion = gr.Code("__completion__", language="python", label="Completion")
129
+
130
+ with gr.Row():
131
+ assertions = gr.Code("__assertions__", language="python", label="Assertions")
132
+ prints = gr.Code("__prints__", language="python", label="Prints")
133
+
134
+ runbtn = gr.Button("Run this code")
135
+
136
+ with gr.Column():
137
+ with gr.Row():
138
+ code_output = gr.Code("__stdout__", language="python", label="Code Outputs")
139
+ code_err = gr.Textbox("__stderr__", label="Code Errors", type="text")
140
+
141
+ gr.Markdown("**Logging**\n")
142
+ with gr.Column():
143
+ with gr.Row():
144
+ flagbtn = gr.Button("Flag this example to log file")
145
+ flagout = gr.Textbox(label="Num examples in logfile")
146
+
147
+ # updates
148
+ # run code
149
+ runbtn.click(fn=capture_output, inputs=[prompt, completion, prints], outputs=[code_err,code_output])
150
+ # change example on slider change
151
+ components = [slider, header_data, success_data, prompt, submitted_text, completion, assertions, prints, code_err, code_output]
152
+ slider.input(fn=update_components, inputs=[dataset, *components], outputs=components)
153
+ # log
154
+ callback.setup(components, "flagged_data_points")
155
+ flagbtn.click(lambda *args: callback.flag(list(args)), components, flagout,
156
+ preprocess=False, show_progress="full", trigger_mode="once")
157
+
158
+ # add filtering options
159
+ gr.Markdown("**Filtering (reload to clear all filters)**\n")
160
+ with gr.Row():
161
+ with gr.Column():
162
+ fs_box = gr.Checkbox(label="is_first_success")
163
+ ls_box = gr.Checkbox(label="is_last_success")
164
+ with gr.Column():
165
+ ff_box = gr.Checkbox(label="is_first_failure")
166
+ lf_box = gr.Checkbox(label="is_last_failure")
167
+ with gr.Column():
168
+ f_box = gr.Checkbox(label="first_attempt")
169
+ l_box = gr.Checkbox(label="last_attempt")
170
+ is_success_box = gr.Checkbox(label="is_success")
171
+ success_boxes = [fs_box, ls_box, ff_box, lf_box, f_box, l_box, is_success_box]
172
+ problem_box = gr.Dropdown(label="problem", choices = problem_names)
173
+ student_box = gr.Dropdown(label="username", choices = student_usernames)
174
+ filter_btn = gr.Button("Filter")
175
+
176
+ filter_btn.click(fn=partial(filter_by, args.dataset, args.split), inputs=[*success_boxes, problem_box, student_box, *components],
177
+ outputs=[dataset, *components])
178
+
179
+ demo.launch(share=args.share)
180
+
181
+ if __name__ == "__main__":
182
+ parser = argparse.ArgumentParser()
183
+ parser.add_argument("--dataset", type=str, default="nuprl-staging/studenteval_tagged_prompts")
184
+ parser.add_argument("--split", type=str, default="test")
185
+ parser.add_argument("--share", action="store_true")
186
+ args = parser.parse_args()
187
+ main(args)