Spaces:
Runtime error
Runtime error
File size: 6,089 Bytes
1a74ea4 c3d4a7e 1abd1eb 1a74ea4 1abd1eb 1a74ea4 1abd1eb 1a74ea4 1abd1eb c3d4a7e 1a74ea4 c3d4a7e 1a74ea4 c3d4a7e 1abd1eb 1a74ea4 1abd1eb 1a74ea4 1abd1eb 1a74ea4 c3d4a7e 1a74ea4 1abd1eb 1a74ea4 1abd1eb 1a74ea4 1abd1eb 1a74ea4 c3d4a7e 1a74ea4 1abd1eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
from datasets import load_dataset
import argparse
import pandas as pd
from functools import partial
import subprocess
# Headers and datatypes for the remaining columns
HEADERS = ["__index_level_0__", "problem", "username", "entrypoint", "submitted_text", "prompt", "subset"]
DATATYPES = ["number", "str", "str", "str", "str", "str", "str"]
SUCCESS_HEADERS = ["subset"]
SUCCESS_DATATYPES = ["str"]
def capture_output(prompt, completion, prints):
code = "\n".join([prompt, " "+" \n".join(completion.split("\n")), prints])
outputs = subprocess.run(["python", "-c", code], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stderr = gr.Textbox(outputs.stderr.decode("utf-8").strip(), label="Code Errors", type="text")
stdout = gr.Code(outputs.stdout.decode("utf-8").strip(), label="Code Outputs", language="python")
return stderr, stdout
def update_components(
ds,
slider,
header_data,
success_data,
prompt,
submitted_text,
assertions,
):
if isinstance(ds, gr.State):
ds = ds.value
row = ds.iloc[[slider]]
header_data = gr.Dataframe(
headers=HEADERS,
datatype=DATATYPES,
row_count=1,
col_count=(len(HEADERS), "fixed"),
column_widths=["60px"]*len(HEADERS),
value=row[HEADERS],
interactive=False
)
success_data = gr.Dataframe(
headers=SUCCESS_HEADERS,
datatype=SUCCESS_DATATYPES,
row_count=1,
col_count=(len(SUCCESS_HEADERS), "fixed"),
column_widths=["60px"]*len(SUCCESS_HEADERS),
value=row[SUCCESS_HEADERS],
interactive=False
)
row = row.iloc[0]
prompt = gr.Code(row["prompt"], language="python", label="Prompt")
submitted_text = gr.Textbox(row["submitted_text"], type="text", label="Submitted Text")
assertions = gr.Code(row["assertions"], language="python", label="Assertions")
slider = gr.Slider(0, len(ds) - 1, step=1, label="Problem ID (click and arrow keys to navigate):", value=slider)
return [slider, header_data, success_data, prompt, submitted_text,
assertions]
def filter_by(
dataset_name,
dataset_split,
problem_box,
student_box,
slider,
*components_to_update):
ds = load_dataset(dataset_name, split=dataset_split)
ds = ds.to_pandas()
if problem_box != None:
ds = ds[ds["problem"] == problem_box]
if student_box != None:
ds = ds[ds["username"] == student_box]
return [ds, *update_components(ds, 0, *components_to_update)]
def next_example(ds, *components):
slider_value = components[0]
new_slider_value = int(slider_value)+1 if slider_value < len(ds)-1 else len(ds)-1
lesscomponents = components[1:]
return update_components(ds, new_slider_value, *lesscomponents)
def prev_example(ds, *components):
slider_value = components[0]
new_slider_value = int(slider_value)-1 if slider_value > 0 else 0
lesscomponents = components[1:]
return update_components(ds, new_slider_value, *lesscomponents)
def main(args):
ds = load_dataset(args.dataset, split=args.split)
ds = ds.to_pandas()
callback = gr.SimpleCSVLogger()
student_usernames = list(set(ds["username"]))
student_usernames.sort(key=lambda x: int(x.replace("student","")))
problem_names = list(set(ds["problem"]))
problem_names.sort()
with gr.Blocks(theme="gradio/monochrome") as demo:
dataset = gr.State(ds)
# slider for selecting problem id
slider = gr.Slider(0, len(ds) - 1, step=1, label="Problem ID (click and arrow keys to navigate):")
# display headers in dataframe for problem id
header_data = gr.Dataframe(
headers=HEADERS,
datatype=DATATYPES,
row_count=1,
col_count=(len(HEADERS), "fixed"),
column_widths=["60px"]*len(HEADERS),
interactive=False,
)
success_data = gr.Dataframe(
headers=SUCCESS_HEADERS,
datatype=SUCCESS_DATATYPES,
row_count=1,
col_count=(len(SUCCESS_HEADERS), "fixed"),
column_widths=["60px"]*len(SUCCESS_HEADERS),
interactive=False,
)
prompt = gr.Code("__prompt__", language="python", label="Prompt")
with gr.Row():
prev_btn = gr.Button("Previous")
next_btn = gr.Button("Next")
submitted_text = gr.Textbox("__submitted_text__", type="text", label="Submitted Text")
with gr.Row():
assertions = gr.Code("__assertions__", language="python", label="Assertions")
# updates
# change example on slider change
components = [slider, header_data, success_data, prompt, submitted_text, assertions]
slider.input(fn=update_components, inputs=[dataset, *components], outputs=components)
prev_btn.click(fn=prev_example, inputs=[dataset, *components], outputs=components)
next_btn.click(fn=next_example, inputs=[dataset, *components], outputs=components)
# add filtering options
gr.Markdown("**Filtering (reload to clear all filters)**\n")
with gr.Row():
with gr.Column():
problem_box = gr.Dropdown(label="problem", choices=problem_names)
student_box = gr.Dropdown(label="username", choices=student_usernames)
filter_btn = gr.Button("Filter")
filter_btn.click(fn=partial(filter_by, args.dataset, args.split), inputs=[problem_box, student_box, *components],
outputs=[dataset, *components])
demo.launch(share=args.share)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="nuprl-staging/studenteval_tagged_prompts")
parser.add_argument("--split", type=str, default="test")
parser.add_argument("--share", action="store_true")
args = parser.parse_args()
main(args) |