Spaces:

babylm
/

leaderboard-2024

Running

Aaron Mueller commited on Nov 23, 2024

Commit

1ab72b5

1 Parent(s): e7e9a2c

testing

Files changed (3) hide show

app.py CHANGED Viewed

@@ -191,7 +191,7 @@ with demo:
                     model_name_textbox,
                     revision_name_textbox,
                     track_name,
-                    upload_button,
                 ],
                 submission_result,
             )

                     model_name_textbox,
                     revision_name_textbox,
                     track_name,
+                    submitted_predictions,
                 ],
                 submission_result,
             )

src/leaderboard/read_evals.py CHANGED Viewed

@@ -54,6 +54,7 @@ class EvalResult:
         def _get_task_results(task):
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 return None

         def _get_task_results(task):
             # We average all scores of a given metric (not all metrics are present in all files)
+            print(data["results"].items())
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 return None

src/submission/submit.py CHANGED Viewed

@@ -16,9 +16,9 @@ USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     model_name: str,
-    preds_path: str,
-    track: str,
     revision: str,
 ):
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
@@ -32,9 +32,6 @@ def add_new_eval(
         model_path = model_name.split("/")[1]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    if preds_path is None or preds_path == "":
-        return styled_error("Please enter a URL where your predictions file can be downloaded.")
     if track is None:
         return styled_error("Please select a track.")
@@ -58,9 +55,9 @@ def add_new_eval(
     eval_entry = {
         "model_name": model_name,
-        "preds_path": preds_path,
-        "track": track,
         "revision": revision,
         "status": "PENDING",
         "submitted_time": current_time,
         "private": False,
@@ -91,5 +88,5 @@ def add_new_eval(
     os.remove(out_path)
     return styled_message(
-        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
-    )

 def add_new_eval(
     model_name: str,
     revision: str,
+    track: str,
+    predictions: dict,
 ):
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
         model_path = model_name.split("/")[1]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     if track is None:
         return styled_error("Please select a track.")
     eval_entry = {
         "model_name": model_name,
         "revision": revision,
+        "track": track,
+        "predictions": predictions,
         "status": "PENDING",
         "submitted_time": current_time,
         "private": False,
     os.remove(out_path)
     return styled_message(
+        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the request to show in the PENDING list."
+    )