lewtun HF staff commited on
Commit
d658c8a
β€’
1 Parent(s): 1394a88
Files changed (2) hide show
  1. app.py +94 -93
  2. evaluation.py +1 -1
app.py CHANGED
@@ -446,9 +446,9 @@ with st.form(key="form"):
446
  elif len(selected_models) == 0:
447
  st.warning("⚠️ No models were selected for evaluation! Please select at least one model and try again.")
448
  elif len(selected_models) > 10:
449
- st.warning("Only 10 models can be evaluated at once. Please select fewer models to evaluate.")
450
  else:
451
- # Filter out previsouly evaluated models
452
  selected_models = filter_evaluated_models(
453
  selected_models,
454
  selected_task,
@@ -458,102 +458,103 @@ with st.form(key="form"):
458
  selected_metrics,
459
  )
460
  print("INFO -- Selected models after filter:", selected_models)
461
-
462
- project_id = str(uuid.uuid4())[:8]
463
- project_payload = {
464
- "username": AUTOTRAIN_USERNAME,
465
- "proj_name": f"eval-project-{project_id}",
466
- "task": TASK_TO_ID[selected_task],
467
- "config": {
468
- "language": AUTOTRAIN_TASK_TO_LANG[selected_task]
469
- if selected_task in AUTOTRAIN_TASK_TO_LANG
470
- else "en",
471
- "max_models": 5,
472
- "instance": {
473
- "provider": "aws",
474
- "instance_type": "ml.g4dn.4xlarge",
475
- "max_runtime_seconds": 172800,
476
- "num_instances": 1,
477
- "disk_size_gb": 150,
 
 
 
 
 
 
478
  },
479
- "evaluation": {"metrics": selected_metrics, "models": selected_models, "hf_username": hf_username},
480
- },
481
- }
482
- print(f"INFO -- Payload: {project_payload}")
483
- project_json_resp = http_post(
484
- path="/projects/create",
485
- payload=project_payload,
486
- token=HF_TOKEN,
487
- domain=AUTOTRAIN_BACKEND_API,
488
- ).json()
489
- print(f"INFO -- Project creation response: {project_json_resp}")
490
-
491
- if project_json_resp["created"]:
492
- data_payload = {
493
- "split": 4, # use "auto" split choice in AutoTrain
494
- "col_mapping": col_mapping,
495
- "load_config": {"max_size_bytes": 0, "shuffle": False},
496
  }
497
- data_json_resp = http_post(
498
- path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
499
- payload=data_payload,
 
500
  token=HF_TOKEN,
501
  domain=AUTOTRAIN_BACKEND_API,
502
- params={
503
- "type": "dataset",
504
- "config_name": selected_config,
505
- "split_name": selected_split,
506
- },
507
  ).json()
508
- print(f"INFO -- Dataset creation response: {data_json_resp}")
509
- if data_json_resp["download_status"] == 1:
510
- train_json_resp = http_get(
511
- path=f"/projects/{project_json_resp['id']}/data/start_process",
 
 
 
 
 
 
 
512
  token=HF_TOKEN,
513
  domain=AUTOTRAIN_BACKEND_API,
 
 
 
 
 
514
  ).json()
515
- print(f"INFO -- AutoTrain job response: {train_json_resp}")
516
- if train_json_resp["success"]:
517
- train_eval_index = {
518
- "train-eval-index": [
519
- {
520
- "config": selected_config,
521
- "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
522
- "task_id": selected_task,
523
- "splits": {"eval_split": selected_split},
524
- "col_mapping": col_mapping,
525
- }
526
- ]
527
- }
528
- selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
529
- dataset_card_url = get_dataset_card_url(selected_dataset)
530
- st.success("βœ… Successfully submitted evaluation job!")
531
- st.markdown(
532
- f"""
533
- Evaluation can take up to 1 hour to complete, so grab a β˜•οΈ or 🍡 while you wait:
534
-
535
- * πŸ”” A \
536
- [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions)\
537
- with the evaluation results will be opened for each model you selected. \
538
- Check your email for notifications.
539
- * πŸ“Š Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
540
- to view the results from your submission once the Hub pull request is merged.
541
- * πŸ₯± Tired of configuring evaluations? Add the following metadata to the \
542
- [dataset card]({dataset_card_url}) to enable 1-click evaluations:
543
- """
544
- )
545
- st.markdown(
546
- f"""
547
- ```yaml
548
- {selected_metadata}
549
- """
550
- )
551
- print("INFO -- Pushing evaluation job logs to the Hub")
552
- evaluation_log = {}
553
- evaluation_log["payload"] = project_payload
554
- evaluation_log["project_creation_response"] = project_json_resp
555
- evaluation_log["dataset_creation_response"] = data_json_resp
556
- evaluation_log["autotrain_job_response"] = train_json_resp
557
- commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
558
- else:
559
- st.error("πŸ™ˆ Oh no, there was an error submitting your evaluation job!")
 
 
 
 
 
446
  elif len(selected_models) == 0:
447
  st.warning("⚠️ No models were selected for evaluation! Please select at least one model and try again.")
448
  elif len(selected_models) > 10:
449
+ st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.")
450
  else:
451
+ # Filter out previously evaluated models
452
  selected_models = filter_evaluated_models(
453
  selected_models,
454
  selected_task,
 
458
  selected_metrics,
459
  )
460
  print("INFO -- Selected models after filter:", selected_models)
461
+ if len(selected_models) > 0:
462
+ project_id = str(uuid.uuid4())[:8]
463
+ project_payload = {
464
+ "username": AUTOTRAIN_USERNAME,
465
+ "proj_name": f"eval-project-{project_id}",
466
+ "task": TASK_TO_ID[selected_task],
467
+ "config": {
468
+ "language": AUTOTRAIN_TASK_TO_LANG[selected_task]
469
+ if selected_task in AUTOTRAIN_TASK_TO_LANG
470
+ else "en",
471
+ "max_models": 5,
472
+ "instance": {
473
+ "provider": "aws",
474
+ "instance_type": "ml.g4dn.4xlarge",
475
+ "max_runtime_seconds": 172800,
476
+ "num_instances": 1,
477
+ "disk_size_gb": 150,
478
+ },
479
+ "evaluation": {
480
+ "metrics": selected_metrics,
481
+ "models": selected_models,
482
+ "hf_username": hf_username,
483
+ },
484
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  }
486
+ print(f"INFO -- Payload: {project_payload}")
487
+ project_json_resp = http_post(
488
+ path="/projects/create",
489
+ payload=project_payload,
490
  token=HF_TOKEN,
491
  domain=AUTOTRAIN_BACKEND_API,
 
 
 
 
 
492
  ).json()
493
+ print(f"INFO -- Project creation response: {project_json_resp}")
494
+
495
+ if project_json_resp["created"]:
496
+ data_payload = {
497
+ "split": 4, # use "auto" split choice in AutoTrain
498
+ "col_mapping": col_mapping,
499
+ "load_config": {"max_size_bytes": 0, "shuffle": False},
500
+ }
501
+ data_json_resp = http_post(
502
+ path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
503
+ payload=data_payload,
504
  token=HF_TOKEN,
505
  domain=AUTOTRAIN_BACKEND_API,
506
+ params={
507
+ "type": "dataset",
508
+ "config_name": selected_config,
509
+ "split_name": selected_split,
510
+ },
511
  ).json()
512
+ print(f"INFO -- Dataset creation response: {data_json_resp}")
513
+ if data_json_resp["download_status"] == 1:
514
+ train_json_resp = http_get(
515
+ path=f"/projects/{project_json_resp['id']}/data/start_process",
516
+ token=HF_TOKEN,
517
+ domain=AUTOTRAIN_BACKEND_API,
518
+ ).json()
519
+ print(f"INFO -- AutoTrain job response: {train_json_resp}")
520
+ if train_json_resp["success"]:
521
+ train_eval_index = {
522
+ "train-eval-index": [
523
+ {
524
+ "config": selected_config,
525
+ "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
526
+ "task_id": selected_task,
527
+ "splits": {"eval_split": selected_split},
528
+ "col_mapping": col_mapping,
529
+ }
530
+ ]
531
+ }
532
+ selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
533
+ dataset_card_url = get_dataset_card_url(selected_dataset)
534
+ st.success("βœ… Successfully submitted evaluation job!")
535
+ st.markdown(
536
+ f"""
537
+ Evaluation can take up to 1 hour to complete, so grab a β˜•οΈ or 🍡 while you wait:
538
+
539
+ * πŸ”” A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
540
+ * πŸ“Š Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
541
+ * πŸ₯± Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
542
+ """ # noqa
543
+ )
544
+ st.markdown(
545
+ f"""
546
+ ```yaml
547
+ {selected_metadata}
548
+ """
549
+ )
550
+ print("INFO -- Pushing evaluation job logs to the Hub")
551
+ evaluation_log = {}
552
+ evaluation_log["payload"] = project_payload
553
+ evaluation_log["project_creation_response"] = project_json_resp
554
+ evaluation_log["dataset_creation_response"] = data_json_resp
555
+ evaluation_log["autotrain_job_response"] = train_json_resp
556
+ commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
557
+ else:
558
+ st.error("πŸ™ˆ Oh no, there was an error submitting your evaluation job!")
559
+ else:
560
+ st.warning("⚠️ No models left to evaluate! Please select other models and try again.")
evaluation.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
6
  from huggingface_hub.hf_api import DatasetInfo
7
 
8
 
9
- @dataclass(frozen=True, eq=True, unsafe_hash=True)
10
  class EvaluationInfo:
11
  task: str
12
  model: str
 
6
  from huggingface_hub.hf_api import DatasetInfo
7
 
8
 
9
+ @dataclass(frozen=True, eq=True)
10
  class EvaluationInfo:
11
  task: str
12
  model: str