Spaces:
Sleeping
Sleeping
File size: 9,044 Bytes
67741f2 bae4131 3119795 e3a07b7 3119795 67741f2 7ccf9d4 67741f2 d1ed69b 7ccf9d4 bae4131 570d85c 089a447 bae4131 67741f2 6454c0e e3a07b7 d50990e e3a07b7 d50990e e3a07b7 d50990e d1ed69b 6454c0e 3119795 e3a07b7 089a447 e3a07b7 d50990e 7ccf9d4 e3a07b7 7ccf9d4 089a447 bae4131 570d85c 25580aa 089a447 67741f2 bae4131 67741f2 bae4131 089a447 3d76e98 23510fc 3d76e98 4a9b060 3d76e98 7ccf9d4 3d76e98 7ccf9d4 23510fc 089a447 d50990e 7ccf9d4 bae4131 67741f2 bae4131 d50990e e3a07b7 089a447 d50990e 67741f2 d50990e 25580aa d50990e 67741f2 d50990e 67741f2 d50990e 67741f2 d50990e 67741f2 d50990e 570d85c 67741f2 570d85c 67741f2 d1ed69b 089a447 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
import asyncio
import os
import sys
import time
import gradio as gr
from datasets import load_dataset
from huggingface_hub import whoami
from loguru import logger
from pathlib import Path
from yourbench_space.config import generate_and_save_config
from yourbench_space.utils import (
CONFIG_PATH,
UPLOAD_DIRECTORY,
SubprocessManager,
save_files,
update_dataset,
STAGES,
)
from yourbench_space.evaluation import create_eval_file, run_evaluations
from yourbench_space.leaderboard_space.env import HF_TOKEN
project_description = """
# YourBench 🚀
**Dynamic Benchmark Generation for Language Models**
Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
- 📖 [FAQ](#)
- 💻 [GitHub](https://github.com/huggingface/yourbench/tree/v0.2-alpha-space)
"""
UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
logger.remove()
logger.add(sys.stderr, level="INFO")
command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
manager = SubprocessManager(command)
docs_path = Path(__file__).parent / "docs.md"
citation_content = (
docs_path.read_text().split("# Citation")[-1].strip()
if docs_path.exists()
else "# Citation\n\nDocumentation file not found."
)
def generate_and_return(hf_org, hf_prefix):
generate_and_save_config(hf_org, hf_prefix)
for _ in range(5):
if CONFIG_PATH.exists():
break
time.sleep(0.5)
return (
(
"✅ Config saved!",
gr.update(value=str(CONFIG_PATH), visible=True, interactive=True),
)
if CONFIG_PATH.exists()
else (
"❌ Config generation failed.",
gr.update(visible=False, interactive=False),
)
)
final_dataset = None
def update_process_status():
"""Update process status and include exit details if process has terminated"""
is_running = manager.is_running()
if not is_running:
exit_code, exit_reason = manager.get_exit_details()
status_text = f"Process Status: Stopped - {exit_reason}, exit code - {exit_code}" if exit_reason else "Process Status: Stopped"
return gr.update(value=False, label=status_text)
return gr.update(value=True, label="Process Status: Running")
def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_name: str, _=None):
new_env = os.environ.copy()
if oauth_token:
new_env["HF_TOKEN"] = oauth_token.token
new_env["DATASET_PREFIX"] = hf_dataset_name
manager.start_process(custom_env=new_env)
def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None):
if oauth_token is None:
return gr.Dropdown([], label="Organization")
try:
user_info = whoami(oauth_token.token)
org_names = [org["name"] for org in user_info.get("orgs", [])]
user_name = user_info.get("name", "Unknown User")
org_names.insert(0, user_name)
return gr.Dropdown(org_names, value=user_name, label="Organization")
except Exception as e:
return gr.Dropdown([], label="Organization")
def switch_to_run_generation_tab():
return gr.Tabs(selected=1)
def enable_button(files):
return gr.update(interactive=bool(files))
def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_name):
# Test dataset existence
eval_ds_name = f"{org_name}/{eval_name}"
# Test dataset existence
try:
load_dataset(eval_ds_name, streaming=True)
except Exception as e:
print(f"Error while loading the dataset: {e}")
return
# Run evaluations
create_eval_file(eval_ds_name)
status = asyncio.run(run_evaluations(eval_ds_name=eval_ds_name, org=org_name))
# Create space
from huggingface_hub import HfApi
repo_id = f"{org_name}/leaderboard_yourbench_{eval_ds_name.replace('/', '_')}"
api = HfApi()
try:
api.create_repo(repo_id=repo_id, repo_type="space", space_sdk="gradio")
api.upload_folder(repo_id=repo_id, repo_type="space", folder_path="src/")
api.add_space_secret(repo_id=repo_id, key="HF_TOKEN", value=HF_TOKEN)
api.add_space_variable(repo_id=repo_id, key="TASK", value=eval_ds_name)
api.add_space_variable(repo_id=repo_id, key="ORG_NAME", value=org_name)
except Exception as e:
status = "Evaluation" + status + "\nLeaderboard creation:" + e
return status
with gr.Blocks(theme=gr.themes.Default()) as app:
gr.Markdown(project_description)
with gr.Tabs() as tabs:
with gr.Tab("Setup", id=0):
with gr.Row():
with gr.Column():
login_btn = gr.LoginButton()
with gr.Accordion("Hugging Face Settings"):
hf_org_dropdown = gr.Dropdown(
choices=[], label="Organization", allow_custom_value=True
)
app.load(
update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown
)
hf_dataset_name = gr.Textbox(
label="Dataset name",
value="yourbench",
info="Name of your new evaluation dataset",
)
with gr.Accordion("Upload documents"):
file_input = gr.File(
label="Upload text files",
file_count="multiple",
file_types=[".txt", ".md", ".html", ".pdf"],
)
output = gr.Textbox(label="Log")
file_input.upload(
lambda files: save_files([file.name for file in files]),
file_input,
output,
)
with gr.Row():
preview_button = gr.Button("Generate New Config", interactive=False)
log_message = gr.Textbox(label="Log Message", visible=True)
download_button = gr.File(
label="Download Config", visible=False, interactive=False
)
file_input.change(enable_button, inputs=file_input, outputs=preview_button)
preview_button.click(
generate_and_return,
inputs=[hf_org_dropdown, hf_dataset_name],
outputs=[log_message, download_button],
)
preview_button.click(
switch_to_run_generation_tab,
inputs=None,
outputs=tabs,
)
with gr.Tab("Run Generation", id=1):
with gr.Row():
start_button = gr.Button("Start Task")
start_button.click(prepare_task, inputs=[login_btn, hf_dataset_name])
stop_button = gr.Button("Stop Task")
stop_button.click(manager.stop_process)
kill_button = gr.Button("Kill Task")
kill_button.click(manager.kill_process)
with gr.Column():
with gr.Row():
with gr.Accordion("Log Output", open=True):
log_output = gr.Code(language=None, lines=20, interactive=False)
with gr.Row():
process_status = gr.Checkbox(label="Process Status", interactive=False)
status_timer = gr.Timer(1.0, active=True)
status_timer.tick(update_process_status, outputs=process_status)
with gr.Column():
with gr.Accordion("Stages", open=True):
stages_table = gr.CheckboxGroup(
choices=STAGES,
value=[],
label="Pipeline Stages Completed",
interactive=False,
)
with gr.Accordion("Ingestion"):
ingestion_df = gr.DataFrame()
with gr.Accordion("Summarization"):
summarization_df = gr.DataFrame()
with gr.Accordion("Single-Hop"):
single_hop = gr.DataFrame()
with gr.Accordion("Answer Generation"):
answers_df = gr.DataFrame()
stages_table.change(
update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_name], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
)
log_timer = gr.Timer(1.0, active=True)
log_timer.tick(
manager.read_and_get_output, outputs=[log_output, stages_table]
)
with gr.Tab("Evaluate", id=2):
with gr.Row():
btn_launch_evals = gr.Button("Launch evaluations")
status = gr.Textbox(label="Status")
btn_launch_evals.click(run_evaluation_pipeline, [hf_org_dropdown, hf_dataset_name], status)
app.launch(allowed_paths=["/app"])
|