Spaces:

HumanEval-V
/

HumanEval-V-Benchmark-Viewer

Running

App Files Files Community

HumanEval-V-Benchmark-Viewer / app.py

anonymous-researcher912

add instruction on reporting issues

3dc6935 18 days ago

raw

history blame

1.89 kB

	import streamlit as st
	import datasets

	humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
	st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")

	st.markdown("> <i>This is a viewer for the HumanEval-V benchmark, which consists of 108 coding tasks. Use the navigation buttons or enter an index to browse the tasks. If you encounter any issues, we encourage you to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
	st.markdown("---")

	max_index = 108

	# Initialize session state for index if not present
	if 'index' not in st.session_state:
	st.session_state.index = 1

	buttons = st.columns([2, 1.1, 5.9])

	with buttons[0]:
	st.markdown("# HumanEval-V Viewer")

	with buttons[1]:
	# Number input for navigation
	index_input = st.number_input(
	f"Go to index (1-{max_index}):",
	min_value=1,
	max_value=108,
	value=st.session_state.index,
	key="index_input",
	help="Enter an index and jump to that index.",
	step=1
	)

	coding_task = humaneval_v_data[index_input-1]
	qid = coding_task["qid"]
	image = coding_task["image"]
	function_signature = coding_task["function_signature"]
	ground_truth = coding_task["ground_truth_solution"]
	test_script = coding_task["test_script"]

	upper_columns = st.columns([2, 7])
	with upper_columns[0]:
	st.markdown(f"### Question ID: {qid}")
	st.image(image, use_column_width=True)
	st.markdown("---")
	with upper_columns[1]:
	st.markdown(f"### Function Signature:")
	st.markdown(f"")
	st.markdown(f"""```python
	{function_signature}
	```""")
	st.markdown(f"### Test Script:")
	st.markdown(f"")
	st.markdown(f"""```python
	{test_script}
	```""")
	st.markdown(f"### Ground Truth Solution:")
	st.markdown(f"")
	st.markdown(f"""```python
	{ground_truth}
	```""")