Spaces:

autoevaluate
/

model-evaluator

Tristan Thrush

added url redirect for dataset selection

d7af059 over 2 years ago

4.74 kB

	import os
	from pathlib import Path

	import streamlit as st
	from dotenv import load_dotenv

	from utils import get_compatible_models, get_metadata, http_post

	if Path(".env").is_file():
	load_dotenv(".env")

	HF_TOKEN = os.getenv("HF_TOKEN")
	AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
	AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")


	TASK_TO_ID = {
	"binary_classification": 1,
	"multi_class_classification": 2,
	"multi_label_classification": 3,
	"entity_extraction": 4,
	"extractive_question_answering": 5,
	"translation": 6,
	"summarization": 8,
	"single_column_regression": 10,
	}

	# TODO: remove this hardcorded logic and accept any dataset on the Hub
	DATASETS_TO_EVALUATE = ["emotion", "conll2003", "imdb", "squad", "xsum", "ncbi_disease", "go_emotions"]

	###########
	### APP ###
	###########
	st.title("Evaluation as a Service")
	st.markdown(
	"""
	Welcome to Hugging Face's Evaluation as a Service! This application allows
	you to evaluate any 🤗 Transformers model with a dataset on the Hub. Please
	select the dataset and configuration below. The results of your evaluation
	will be displayed on the public leaderboard
	[here](https://huggingface.co/spaces/autoevaluate/leaderboards).
	"""
	)

	selectable_datasets = [f"lewtun/autoevaluate__{dset}" for dset in DATASETS_TO_EVALUATE]

	query_params = st.experimental_get_query_params()
	default_dataset = selectable_datasets[0]
	if "dataset" in query_params:
	if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
	default_dataset = query_params["dataset"][0]

	dataset_name = st.selectbox(
	"Select a dataset",
	selectable_datasets,
	index=selectable_datasets.index(default_dataset))

	# TODO: remove this step once we select real datasets
	# Strip out original dataset name
	original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]

	# In general this will be a list of multiple configs => need to generalise logic here
	metadata = get_metadata(dataset_name)

	with st.expander("Advanced configuration"):

	dataset_config = st.selectbox("Select a config", [metadata[0]["config"]])

	splits = metadata[0]["splits"]
	split_names = list(splits.values())
	eval_split = splits.get("eval_split", split_names[0])

	selected_split = st.selectbox("Select a split", split_names, index=split_names.index(eval_split))

	# TODO: add a function to handle the mapping task <--> column mapping
	col_mapping = metadata[0]["col_mapping"]
	col_names = list(col_mapping.keys())

	# TODO: figure out how to get all dataset column names (i.e. features) without download dataset itself
	st.markdown("Map your data columns")
	col1, col2 = st.columns(2)

	# TODO: find a better way to layout these items
	# TODO: propagate this information to payload
	with col1:
	st.markdown("`text` column")
	st.text("")
	st.text("")
	st.text("")
	st.text("")
	st.markdown("`target` column")
	with col2:
	st.selectbox("This column should contain the text you want to classify", col_names, index=0)
	st.selectbox("This column should contain the labels you want to assign to the text", col_names, index=1)

	with st.form(key="form"):

	compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)

	selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models, compatible_models[0])

	submit_button = st.form_submit_button("Make submission")

	if submit_button:
	for model in selected_models:
	payload = {
	"username": AUTOTRAIN_USERNAME,
	"task": TASK_TO_ID[metadata[0]["task_id"]],
	"model": model,
	"col_mapping": metadata[0]["col_mapping"],
	"split": selected_split,
	"dataset": original_dataset_name,
	"config": dataset_config,
	}
	json_resp = http_post(
	path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
	).json()
	if json_resp["status"] == 1:
	st.success(f"✅ Successfully submitted model {model} for evaluation with job ID {json_resp['id']}")
	st.markdown(
	f"""
	Evaluation takes appoximately 1 hour to complete, so grab a ☕ or 🍵 while you wait:

	* 📊 Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
	"""
	)
	else:
	st.error("🙈 Oh noes, there was an error submitting your submission!")