anonymous-researcher912
commited on
Commit
•
cf3b6c5
1
Parent(s):
2f09d3b
initialize
Browse files- app.py +65 -0
- humaneval_v_test_hf/data-00000-of-00001.arrow +3 -0
- humaneval_v_test_hf/dataset_info.json +57 -0
- humaneval_v_test_hf/state.json +13 -0
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import datasets
|
3 |
+
|
4 |
+
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
|
5 |
+
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")
|
6 |
+
st.markdown("---")
|
7 |
+
|
8 |
+
max_index = 108
|
9 |
+
|
10 |
+
# Initialize session state for index if not present
|
11 |
+
if 'index' not in st.session_state:
|
12 |
+
st.session_state.index = 1
|
13 |
+
|
14 |
+
buttons = st.columns([2, 1, 6])
|
15 |
+
|
16 |
+
with buttons[1]:
|
17 |
+
# Number input for navigation
|
18 |
+
index_input = st.number_input(
|
19 |
+
f"Go to index (1-{max_index}):",
|
20 |
+
min_value=1,
|
21 |
+
max_value=108,
|
22 |
+
value=st.session_state.index,
|
23 |
+
key="index_input",
|
24 |
+
help="Enter an index and jump to that index.",
|
25 |
+
step=1 # Increment by 1
|
26 |
+
)
|
27 |
+
|
28 |
+
with buttons[0]:
|
29 |
+
st.markdown("# HumanEval-V Viewer")
|
30 |
+
|
31 |
+
|
32 |
+
# Check if the input differs from the current session state and update it
|
33 |
+
if index_input != st.session_state.index:
|
34 |
+
st.session_state.index = index_input
|
35 |
+
st.experimental_rerun()
|
36 |
+
|
37 |
+
|
38 |
+
coding_task = humaneval_v_data[st.session_state.index-1]
|
39 |
+
qid = coding_task["qid"]
|
40 |
+
image = coding_task["image"]
|
41 |
+
function_signature = coding_task["function_signature"]
|
42 |
+
ground_truth = coding_task["ground_truth_solution"]
|
43 |
+
test_script = coding_task["test_script"]
|
44 |
+
|
45 |
+
upper_columns = st.columns([2, 7])
|
46 |
+
with upper_columns[0]:
|
47 |
+
st.markdown(f"### Question ID: {qid}")
|
48 |
+
st.image(image, use_column_width=True)
|
49 |
+
st.markdown("---")
|
50 |
+
with upper_columns[1]:
|
51 |
+
st.markdown(f"### Function Signature:")
|
52 |
+
st.markdown(f"")
|
53 |
+
st.markdown(f"""```python
|
54 |
+
{function_signature}
|
55 |
+
```""")
|
56 |
+
st.markdown(f"### Test Script:")
|
57 |
+
st.markdown(f"")
|
58 |
+
st.markdown(f"""```python
|
59 |
+
{test_script}
|
60 |
+
```""")
|
61 |
+
st.markdown(f"### Ground Truth Solution:")
|
62 |
+
st.markdown(f"")
|
63 |
+
st.markdown(f"""```python
|
64 |
+
{ground_truth}
|
65 |
+
```""")
|
humaneval_v_test_hf/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:774e4cacfc259917fb5a5e8433e6cacbcac01063cb30fd3560170b3a0a9fa76e
|
3 |
+
size 12842912
|
humaneval_v_test_hf/dataset_info.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"builder_name": "parquet",
|
3 |
+
"citation": "",
|
4 |
+
"config_name": "default",
|
5 |
+
"dataset_name": "human_eval-v-benchmark",
|
6 |
+
"dataset_size": 12841384,
|
7 |
+
"description": "",
|
8 |
+
"download_checksums": {
|
9 |
+
"hf://datasets/HumanEval-V/HumanEval-V-Benchmark@50af2be232641ca618f6aecce901ca5e5a83b20e/data/test-00000-of-00001.parquet": {
|
10 |
+
"num_bytes": 12571814,
|
11 |
+
"checksum": null
|
12 |
+
}
|
13 |
+
},
|
14 |
+
"download_size": 12571814,
|
15 |
+
"features": {
|
16 |
+
"qid": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"ground_truth_solution": {
|
21 |
+
"dtype": "string",
|
22 |
+
"_type": "Value"
|
23 |
+
},
|
24 |
+
"image_description": {
|
25 |
+
"dtype": "string",
|
26 |
+
"_type": "Value"
|
27 |
+
},
|
28 |
+
"test_script": {
|
29 |
+
"dtype": "string",
|
30 |
+
"_type": "Value"
|
31 |
+
},
|
32 |
+
"function_signature": {
|
33 |
+
"dtype": "string",
|
34 |
+
"_type": "Value"
|
35 |
+
},
|
36 |
+
"image": {
|
37 |
+
"_type": "Image"
|
38 |
+
}
|
39 |
+
},
|
40 |
+
"homepage": "",
|
41 |
+
"license": "",
|
42 |
+
"size_in_bytes": 25413198,
|
43 |
+
"splits": {
|
44 |
+
"test": {
|
45 |
+
"name": "test",
|
46 |
+
"num_bytes": 12841384,
|
47 |
+
"num_examples": 108,
|
48 |
+
"dataset_name": "human_eval-v-benchmark"
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"version": {
|
52 |
+
"version_str": "0.0.0",
|
53 |
+
"major": 0,
|
54 |
+
"minor": 0,
|
55 |
+
"patch": 0
|
56 |
+
}
|
57 |
+
}
|
humaneval_v_test_hf/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "d8ffc8935ede93f4",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": "test"
|
13 |
+
}
|