Spaces:
Runtime error
Runtime error
Merge pull request #40 from huggingface/refactor-model-filter
Browse files- app.py +106 -105
- evaluation.py +13 -14
app.py
CHANGED
@@ -436,17 +436,6 @@ with st.form(key="form"):
|
|
436 |
)
|
437 |
print("INFO -- Selected models before filter:", selected_models)
|
438 |
|
439 |
-
if len(selected_models) > 0:
|
440 |
-
selected_models = filter_evaluated_models(
|
441 |
-
selected_models,
|
442 |
-
selected_task,
|
443 |
-
selected_dataset,
|
444 |
-
selected_config,
|
445 |
-
selected_split,
|
446 |
-
selected_metrics,
|
447 |
-
)
|
448 |
-
print("INFO -- Selected models after filter:", selected_models)
|
449 |
-
|
450 |
hf_username = st.text_input("Enter your π€ Hub username to be notified when the evaluation is finished")
|
451 |
|
452 |
submit_button = st.form_submit_button("Evaluate models π")
|
@@ -454,106 +443,118 @@ with st.form(key="form"):
|
|
454 |
if submit_button:
|
455 |
if len(hf_username) == 0:
|
456 |
st.warning("No π€ Hub username provided! Please enter your username and try again.")
|
|
|
|
|
457 |
elif len(selected_models) > 10:
|
458 |
-
st.warning("Only 10 models can be evaluated at once. Please select fewer models
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
},
|
477 |
-
"evaluation": {"metrics": selected_metrics, "models": selected_models, "hf_username": hf_username},
|
478 |
-
},
|
479 |
-
}
|
480 |
-
print(f"INFO -- Payload: {project_payload}")
|
481 |
-
project_json_resp = http_post(
|
482 |
-
path="/projects/create",
|
483 |
-
payload=project_payload,
|
484 |
-
token=HF_TOKEN,
|
485 |
-
domain=AUTOTRAIN_BACKEND_API,
|
486 |
-
).json()
|
487 |
-
print(f"INFO -- Project creation response: {project_json_resp}")
|
488 |
-
|
489 |
-
if project_json_resp["created"]:
|
490 |
-
data_payload = {
|
491 |
-
"split": 4, # use "auto" split choice in AutoTrain
|
492 |
-
"col_mapping": col_mapping,
|
493 |
-
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
494 |
}
|
495 |
-
|
496 |
-
|
497 |
-
|
|
|
498 |
token=HF_TOKEN,
|
499 |
domain=AUTOTRAIN_BACKEND_API,
|
500 |
-
params={
|
501 |
-
"type": "dataset",
|
502 |
-
"config_name": selected_config,
|
503 |
-
"split_name": selected_split,
|
504 |
-
},
|
505 |
).json()
|
506 |
-
print(f"INFO --
|
507 |
-
|
508 |
-
|
509 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
510 |
token=HF_TOKEN,
|
511 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
512 |
).json()
|
513 |
-
print(f"INFO --
|
514 |
-
if
|
515 |
-
|
516 |
-
"
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
to
|
539 |
-
|
540 |
-
[
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
|
|
|
|
|
436 |
)
|
437 |
print("INFO -- Selected models before filter:", selected_models)
|
438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
hf_username = st.text_input("Enter your π€ Hub username to be notified when the evaluation is finished")
|
440 |
|
441 |
submit_button = st.form_submit_button("Evaluate models π")
|
|
|
443 |
if submit_button:
|
444 |
if len(hf_username) == 0:
|
445 |
st.warning("No π€ Hub username provided! Please enter your username and try again.")
|
446 |
+
elif len(selected_models) == 0:
|
447 |
+
st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.")
|
448 |
elif len(selected_models) > 10:
|
449 |
+
st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.")
|
450 |
+
else:
|
451 |
+
# Filter out previously evaluated models
|
452 |
+
selected_models = filter_evaluated_models(
|
453 |
+
selected_models,
|
454 |
+
selected_task,
|
455 |
+
selected_dataset,
|
456 |
+
selected_config,
|
457 |
+
selected_split,
|
458 |
+
selected_metrics,
|
459 |
+
)
|
460 |
+
print("INFO -- Selected models after filter:", selected_models)
|
461 |
+
if len(selected_models) > 0:
|
462 |
+
project_id = str(uuid.uuid4())[:8]
|
463 |
+
project_payload = {
|
464 |
+
"username": AUTOTRAIN_USERNAME,
|
465 |
+
"proj_name": f"eval-project-{project_id}",
|
466 |
+
"task": TASK_TO_ID[selected_task],
|
467 |
+
"config": {
|
468 |
+
"language": AUTOTRAIN_TASK_TO_LANG[selected_task]
|
469 |
+
if selected_task in AUTOTRAIN_TASK_TO_LANG
|
470 |
+
else "en",
|
471 |
+
"max_models": 5,
|
472 |
+
"instance": {
|
473 |
+
"provider": "aws",
|
474 |
+
"instance_type": "ml.g4dn.4xlarge",
|
475 |
+
"max_runtime_seconds": 172800,
|
476 |
+
"num_instances": 1,
|
477 |
+
"disk_size_gb": 150,
|
478 |
+
},
|
479 |
+
"evaluation": {
|
480 |
+
"metrics": selected_metrics,
|
481 |
+
"models": selected_models,
|
482 |
+
"hf_username": hf_username,
|
483 |
+
},
|
484 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
}
|
486 |
+
print(f"INFO -- Payload: {project_payload}")
|
487 |
+
project_json_resp = http_post(
|
488 |
+
path="/projects/create",
|
489 |
+
payload=project_payload,
|
490 |
token=HF_TOKEN,
|
491 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
492 |
).json()
|
493 |
+
print(f"INFO -- Project creation response: {project_json_resp}")
|
494 |
+
|
495 |
+
if project_json_resp["created"]:
|
496 |
+
data_payload = {
|
497 |
+
"split": 4, # use "auto" split choice in AutoTrain
|
498 |
+
"col_mapping": col_mapping,
|
499 |
+
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
500 |
+
}
|
501 |
+
data_json_resp = http_post(
|
502 |
+
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
503 |
+
payload=data_payload,
|
504 |
token=HF_TOKEN,
|
505 |
domain=AUTOTRAIN_BACKEND_API,
|
506 |
+
params={
|
507 |
+
"type": "dataset",
|
508 |
+
"config_name": selected_config,
|
509 |
+
"split_name": selected_split,
|
510 |
+
},
|
511 |
).json()
|
512 |
+
print(f"INFO -- Dataset creation response: {data_json_resp}")
|
513 |
+
if data_json_resp["download_status"] == 1:
|
514 |
+
train_json_resp = http_get(
|
515 |
+
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
516 |
+
token=HF_TOKEN,
|
517 |
+
domain=AUTOTRAIN_BACKEND_API,
|
518 |
+
).json()
|
519 |
+
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
520 |
+
if train_json_resp["success"]:
|
521 |
+
train_eval_index = {
|
522 |
+
"train-eval-index": [
|
523 |
+
{
|
524 |
+
"config": selected_config,
|
525 |
+
"task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
|
526 |
+
"task_id": selected_task,
|
527 |
+
"splits": {"eval_split": selected_split},
|
528 |
+
"col_mapping": col_mapping,
|
529 |
+
}
|
530 |
+
]
|
531 |
+
}
|
532 |
+
selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
|
533 |
+
dataset_card_url = get_dataset_card_url(selected_dataset)
|
534 |
+
st.success("β
Successfully submitted evaluation job!")
|
535 |
+
st.markdown(
|
536 |
+
f"""
|
537 |
+
Evaluation can take up to 1 hour to complete, so grab a βοΈ or π΅ while you wait:
|
538 |
+
|
539 |
+
* π A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
|
540 |
+
* π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
|
541 |
+
* π₯± Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
|
542 |
+
""" # noqa
|
543 |
+
)
|
544 |
+
st.markdown(
|
545 |
+
f"""
|
546 |
+
```yaml
|
547 |
+
{selected_metadata}
|
548 |
+
"""
|
549 |
+
)
|
550 |
+
print("INFO -- Pushing evaluation job logs to the Hub")
|
551 |
+
evaluation_log = {}
|
552 |
+
evaluation_log["payload"] = project_payload
|
553 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
554 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
555 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
556 |
+
commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
|
557 |
+
else:
|
558 |
+
st.error("π Oh no, there was an error submitting your evaluation job!")
|
559 |
+
else:
|
560 |
+
st.warning("β οΈ No models left to evaluate! Please select other models and try again.")
|
evaluation.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
|
3 |
import streamlit as st
|
@@ -15,30 +16,29 @@ class EvaluationInfo:
|
|
15 |
metrics: set
|
16 |
|
17 |
|
18 |
-
def
|
19 |
if dataset_info.cardData is not None:
|
20 |
metadata = dataset_info.cardData["eval_info"]
|
21 |
metadata.pop("col_mapping", None)
|
22 |
# TODO(lewtun): populate dataset cards with metric info
|
23 |
if "metrics" not in metadata:
|
24 |
metadata["metrics"] = frozenset()
|
25 |
-
|
26 |
-
|
27 |
-
return
|
28 |
-
else:
|
29 |
-
return None
|
30 |
|
31 |
|
32 |
-
def
|
33 |
filt = DatasetFilter(author="autoevaluate")
|
34 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
35 |
-
return [
|
36 |
|
37 |
|
38 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
39 |
-
|
|
|
40 |
|
41 |
-
for
|
42 |
evaluation_info = EvaluationInfo(
|
43 |
task=task,
|
44 |
model=model,
|
@@ -47,12 +47,11 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
|
|
47 |
dataset_split=dataset_split,
|
48 |
metrics=frozenset(metrics),
|
49 |
)
|
50 |
-
|
51 |
-
if candidate_id in evaluation_ids:
|
52 |
st.info(
|
53 |
-
f"Model `{model}` has already been evaluated on this configuration. \
|
54 |
This model will be excluded from the evaluation job..."
|
55 |
)
|
56 |
-
models.
|
57 |
|
58 |
return models
|
|
|
1 |
+
import copy
|
2 |
from dataclasses import dataclass
|
3 |
|
4 |
import streamlit as st
|
|
|
16 |
metrics: set
|
17 |
|
18 |
|
19 |
+
def create_evaluation_info(dataset_info: DatasetInfo) -> int:
|
20 |
if dataset_info.cardData is not None:
|
21 |
metadata = dataset_info.cardData["eval_info"]
|
22 |
metadata.pop("col_mapping", None)
|
23 |
# TODO(lewtun): populate dataset cards with metric info
|
24 |
if "metrics" not in metadata:
|
25 |
metadata["metrics"] = frozenset()
|
26 |
+
else:
|
27 |
+
metadata["metrics"] = frozenset(metadata["metrics"])
|
28 |
+
return EvaluationInfo(**metadata)
|
|
|
|
|
29 |
|
30 |
|
31 |
+
def get_evaluation_infos():
|
32 |
filt = DatasetFilter(author="autoevaluate")
|
33 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
34 |
+
return [create_evaluation_info(dset) for dset in evaluation_datasets]
|
35 |
|
36 |
|
37 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
38 |
+
evaluation_infos = get_evaluation_infos()
|
39 |
+
models_to_filter = copy.copy(models)
|
40 |
|
41 |
+
for model in models_to_filter:
|
42 |
evaluation_info = EvaluationInfo(
|
43 |
task=task,
|
44 |
model=model,
|
|
|
47 |
dataset_split=dataset_split,
|
48 |
metrics=frozenset(metrics),
|
49 |
)
|
50 |
+
if evaluation_info in evaluation_infos:
|
|
|
51 |
st.info(
|
52 |
+
f"Model [`{model}`](https://huggingface.co/{model}) has already been evaluated on this configuration. \
|
53 |
This model will be excluded from the evaluation job..."
|
54 |
)
|
55 |
+
models.remove(model)
|
56 |
|
57 |
return models
|