guipenedo's picture
guipenedo HF staff
initial upload
28d348e unverified
raw
history blame
4.25 kB
import ast
import json
import urllib
import gradio as gr
import pandas as pd
from datasets import load_dataset
from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns, ColumnFilter
df = pd.read_json("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_table.jsonl", lines=True).explode("suite").reset_index(drop=True)
with urllib.request.urlopen("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_prompt_formatting.py") as f:
tasks_prompt_functions_raw = f.read().decode('utf-8')
tree = ast.parse(tasks_prompt_functions_raw)
tasks_prompt_functions = {}
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
function_name = node.name
# Get the source code for the function node
function_code = ast.get_source_segment(tasks_prompt_functions_raw, node)
tasks_prompt_functions[function_name] = function_code
def load_task_metadata(task_id):
task_row = df.iloc[task_id]
return (task_row.to_dict(), f"""Examples from the HF repository ([{task_row['hf_repo']}](https://huggingface.co/datasets/{task_row['hf_repo']}))""",
tasks_prompt_functions.get(task_row["prompt_function"]), "unknown")
def load_task_examples(task_id):
task_row = df.iloc[task_id]
dataset = load_dataset(task_row["hf_repo"], task_row["hf_subset"], split="+".join(task_row["evaluation_splits"]),
trust_remote_code=task_row["trust_dataset"], streaming=True)
sample_data = next(dataset.iter(20))
# dictionary of lists to list of dictionaries
return pd.DataFrame(dict(zip(sample_data, t if not isinstance(t, dict) and not isinstance(t, list) else json.dumps(t)))
for t in zip(*sample_data.values()))
with gr.Blocks() as demo:
gr.Markdown("""
# LightEval Tasks Explorer
""")
with gr.Tabs() as tabs:
with gr.TabItem("πŸ—ƒοΈ Tasks List"):
Leaderboard(
value=df,
select_columns=SelectColumns(default_selection=["name", "suite", "prompt_function", "hf_repo", "hf_subset", "evaluation_splits", "metric"],
cant_deselect=["name", "suite"],
label="Columns to display"),
search_columns=SearchColumns(primary_column="name", secondary_columns=["suite", "prompt_function", "hf_repo", "metric"],
placeholder="Search for a task by name, suite, prompt_function, hf_repo or "
"metric. To search by suite, for example, type 'suite:<query>'. Separate queries by \";\"",
label="Search"),
filter_columns=[
ColumnFilter("suite", type="dropdown", label="Select suite"),
# ColumnFilter("prompt_function", type="dropdown", label="Select prompt_function"),
# ColumnFilter("metric", type="dropdown", label="Select metric")
],
wrap=True
)
with gr.TabItem("πŸ”Ž Task Inspector"):
task_inspector_selector = gr.Dropdown(
choices=sorted(zip((df['suite'] + '|' + df['name']).tolist(), range(len(df)))),
label="Task",
info="Select a task"
)
with gr.Row():
with gr.Column():
task_metadata = gr.Json(label="Task definition")
with gr.Column():
task_prompt_function = gr.Code(label="Task prompt function", language="python", interactive=False)
task_dataset_header = gr.Markdown("Examples from the HF repository")
task_dataset = gr.Dataframe(wrap=True)
gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_metadata, task_dataset_header, task_prompt_function], fn=load_task_metadata)
gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_dataset], fn=load_task_examples)
if __name__ == "__main__":
demo.launch()