arenahardlb

Running

App Files Files Community

apsys commited on Aug 1, 2024

Commit

7ec1b66

1 Parent(s): 156dae6

some changes

Browse files

Files changed (5) hide show

app.py +5 -3
src/about.py +2 -2
src/display/utils.py +2 -2
src/populate.py +7 -4
src/submission/submit.py +38 -25

app.py CHANGED Viewed

@@ -58,8 +58,8 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn)],
@@ -172,6 +172,7 @@ with demo:
                         interactive=True,
                     )
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
             submit_button = gr.Button("Submit Eval")
             submission_result = gr.Markdown()
@@ -184,6 +185,7 @@ with demo:
                     precision,
                     weight_type,
                     model_type,
                 ],
                 submission_result,
             )
@@ -199,6 +201,6 @@ with demo:
             )
 scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
+    # if dataframe is None or dataframe.empty:
+    #     raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn)],
                         interactive=True,
                     )
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+                    ans_file = gr.File(label="Arena Hard Answer File", file_types=[".json"])
             submit_button = gr.Button("Submit Eval")
             submission_result = gr.Markdown()
                     precision,
                     weight_type,
                     model_type,
+                    ans_file
                 ],
                 submission_result,
             )
             )
 scheduler = BackgroundScheduler()
+# scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

src/about.py CHANGED Viewed

@@ -11,8 +11,8 @@ class Task:
 # Select your tasks here
 # ---------------------------------------------------
 class Tasks(Enum):
-    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("anli_r1", "acc", "ANLI")
     task1 = Task("logiqa", "acc_norm", "LogiQA")
 NUM_FEWSHOT = 0 # Change with your few shot

 # Select your tasks here
 # ---------------------------------------------------
 class Tasks(Enum):
+    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+    task0 = Task("arenahard", "score", "score")
     task1 = Task("logiqa", "acc_norm", "LogiQA")
 NUM_FEWSHOT = 0 # Change with your few shot

src/display/utils.py CHANGED Viewed

@@ -12,7 +12,7 @@ def fields(raw_class):
 # These classes are for user facing column names,
 # to avoid having to change them all around the code
 # when a modif is needed
-@dataclass
 class ColumnContent:
     name: str
     type: str
@@ -23,7 +23,7 @@ class ColumnContent:
 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
-auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
 auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 #Scores
 auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])

 # These classes are for user facing column names,
 # to avoid having to change them all around the code
 # when a modif is needed
+@dataclass(frozen=True)
 class ColumnContent:
     name: str
     type: str
 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
+# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
 auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 #Scores
 auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])

src/populate.py CHANGED Viewed

@@ -13,12 +13,15 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
-    df = pd.DataFrame.from_records(all_data_json)
-    df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
-    df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
-    df = df[has_no_nan_values(df, benchmark_cols)]
     return df

     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
+    df = pd.DataFrame.from_records(all_data_json,columns=cols)
+    df['model']="nothing"
+    # df.columns = cols
+    # df.iloc[0]= create dummy
+    # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
+    # df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
+    # df = df[has_no_nan_values(df, benchmark_cols)]
     return df

src/submission/submit.py CHANGED Viewed

@@ -6,9 +6,9 @@ from src.display.formatting import styled_error, styled_message, styled_warning
 from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
 from src.submission.check_validity import (
     already_submitted_models,
-    check_model_card,
-    get_model_size,
-    is_model_on_hub,
 )
 REQUESTED_MODELS = None
@@ -21,6 +21,7 @@ def add_new_eval(
     precision: str,
     weight_type: str,
     model_type: str,
 ):
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
@@ -44,33 +45,33 @@ def add_new_eval(
         revision = "main"
     # Is the model on the hub?
-    if weight_type in ["Delta", "Adapter"]:
-        base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
-        if not base_model_on_hub:
-            return styled_error(f'Base model "{base_model}" {error}')
-    if not weight_type == "Adapter":
-        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
-        if not model_on_hub:
-            return styled_error(f'Model "{model}" {error}')
     # Is the model info correctly filled?
-    try:
-        model_info = API.model_info(repo_id=model, revision=revision)
-    except Exception:
-        return styled_error("Could not get your model information. Please fill it up properly.")
-    model_size = get_model_size(model_info=model_info, precision=precision)
     # Were the model card and license filled?
-    try:
-        license = model_info.cardData["license"]
-    except Exception:
-        return styled_error("Please select a license for your model")
-    modelcard_OK, error_msg = check_model_card(model)
-    if not modelcard_OK:
-        return styled_error(error_msg)
     # Seems good, creating the eval
     print("Adding new eval")
@@ -84,8 +85,8 @@ def add_new_eval(
         "status": "PENDING",
         "submitted_time": current_time,
         "model_type": model_type,
-        "likes": model_info.likes,
-        "params": model_size,
         "license": license,
         "private": False,
     }
@@ -98,10 +99,14 @@ def add_new_eval(
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     os.makedirs(OUT_DIR, exist_ok=True)
     out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
     with open(out_path, "w") as f:
         f.write(json.dumps(eval_entry))
     print("Uploading eval file")
     API.upload_file(
         path_or_fileobj=out_path,
@@ -110,9 +115,17 @@ def add_new_eval(
         repo_type="dataset",
         commit_message=f"Add {model} to eval queue",
     )
     # Remove the local file
     os.remove(out_path)
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."

 from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
 from src.submission.check_validity import (
     already_submitted_models,
+    # check_model_card,
+    # get_model_size,
+    # is_model_on_hub,
 )
 REQUESTED_MODELS = None
     precision: str,
     weight_type: str,
     model_type: str,
+    ans_file: str,
 ):
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
         revision = "main"
     # Is the model on the hub?
+    # if weight_type in ["Delta", "Adapter"]:
+    #     base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
+    #     if not base_model_on_hub:
+    #         return styled_error(f'Base model "{base_model}" {error}')
+    # if not weight_type == "Adapter":
+    #     model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
+    #     if not model_on_hub:
+    #         return styled_error(f'Model "{model}" {error}')
     # Is the model info correctly filled?
+    # try:
+    #     model_info = API.model_info(repo_id=model, revision=revision)
+    # except Exception:
+    #     return styled_error("Could not get your model information. Please fill it up properly.")
+    # model_size = get_model_size(model_info=model_info, precision=precision)
     # Were the model card and license filled?
+    # try:
+    #     license = model_info.cardData["license"]
+    # except Exception:
+    #     return styled_error("Please select a license for your model")
+    # modelcard_OK, error_msg = check_model_card(model)
+    # if not modelcard_OK:
+    #     return styled_error(error_msg)
     # Seems good, creating the eval
     print("Adding new eval")
         "status": "PENDING",
         "submitted_time": current_time,
         "model_type": model_type,
+        "likes": "",
+        "params": "",
         "license": license,
         "private": False,
     }
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     os.makedirs(OUT_DIR, exist_ok=True)
     out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
+    out_path_upload = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}_toeval.json"
     with open(out_path, "w") as f:
         f.write(json.dumps(eval_entry))
+    with open(out_path_upload, "w") as f:
+        f.write(open(ans_file).read())
     print("Uploading eval file")
     API.upload_file(
         path_or_fileobj=out_path,
         repo_type="dataset",
         commit_message=f"Add {model} to eval queue",
     )
+    API.upload_file(
+        path_or_fileobj=out_path_upload,
+        path_in_repo=out_path_upload.split("eval-queue/")[1],
+        repo_id=QUEUE_REPO,
+        repo_type="dataset",
+        commit_message=f"Add {model} to eval queue",
+    )
     # Remove the local file
     os.remove(out_path)
+    os.remove(out_path_upload)
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."