Spaces:

AMR-KELEG
/

MLADI

Running

App Files Files Community

AMR-KELEG commited on Jan 22

Commit

c147e35

1 Parent(s): 84916fc

Implement a basic version of a queuing system

Browse files

Files changed (3) hide show

app.py +44 -4
background_inference.py +44 -23
utils.py +34 -4

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ import numpy as np
 import pandas as pd
 from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
 st.title("NADI 2024 Leaderboard")
 st.write(
@@ -152,9 +154,36 @@ with tab1:
                 """
             )
 with tab2:
     model_name = st.text_input("Enter a model's name on HF")
     inference_functions_names = [
         func_name for func_name, _ in getmembers(eval_utils, isfunction)
     ]
@@ -163,6 +192,12 @@ with tab2:
         inference_functions_names,
     )
     # Show the docstring of the inference functions
     inference_functions_docstring = [
         getattr(eval_utils, func).__doc__ for func in inference_functions_names
@@ -181,8 +216,13 @@ with tab2:
             "Note: We are happy to discuss adding new custom inference methods for your models."
         )
-    if model_name:
-        subprocess.Popen(
-            ["python", "background_inference.py", model_name, inference_function]
         )
-        st.info(f"Your evaluation request is being processed.")

 import pandas as pd
 from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
+from huggingface_hub import HfApi
+api = HfApi()
 st.title("NADI 2024 Leaderboard")
 st.write(
                 """
             )
+    # Evaluate the models queued
+    if model_predictions_rows:
+        models_to_be_evaluated = []
+        model_in_progress = []
+        for row in model_predictions_rows:
+            if row["status"] == "queued":
+                models_to_be_evaluated.append(row)
+            elif row["status"] == "in_progress":
+                model_in_progress.append(row)
+        if model_in_progress == []:
+            for row in models_to_be_evaluated:
+                # Evaluate the model
+                subprocess.Popen(
+                    [
+                        "python",
+                        "background_inference.py",
+                        row["model_name"],
+                        row["commit_id"],
+                        row["inference_function"],
+                    ]
+                )
+                print(f"Started evaluating of {row['model_name']}.")
 with tab2:
     model_name = st.text_input("Enter a model's name on HF")
+    model_revision = st.text_input(
+        "Enter a model's revision on HF (commit id, or branch name)", placeholder="main"
+    )
     inference_functions_names = [
         func_name for func_name, _ in getmembers(eval_utils, isfunction)
     ]
         inference_functions_names,
     )
+    if model_name and model_revision:
+        # Get the model's commit id
+        commit_id = api.list_repo_commits(model_name, revision=model_revision)[
+            0
+        ].commit_id
     # Show the docstring of the inference functions
     inference_functions_docstring = [
         getattr(eval_utils, func).__doc__ for func in inference_functions_names
             "Note: We are happy to discuss adding new custom inference methods for your models."
         )
+    if model_name and model_name and inference_function:
+        # Add the model to the evaluation queue
+        utils.update_model_queue(
+            repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
+            model_name=model_name,
+            commit_id=commit_id,
+            inference_function=inference_function,
+            status="queued",
         )
+        st.info(f"The evaluation of the model {model_name} is queued for processing.")

background_inference.py CHANGED Viewed

@@ -7,33 +7,54 @@ from constants import DIALECTS_WITH_LABELS
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 model_name = sys.argv[1]
-inference_function = sys.argv[2]
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-# Load the dataset
-dataset_name = os.environ["DATASET_NAME"]
-dataset = datasets.load_dataset(dataset_name)["test"]
-sentences = dataset["sentence"]
-labels = {dialect: dataset[dialect] for dialect in DIALECTS_WITH_LABELS}
-predictions = []
-for i, sentence in enumerate(sentences):
-    predictions.append(
-        getattr(eval_utils, inference_function)(model, tokenizer, sentence)
-    )
-    print(
-        f"Inference progress ({model_name}, {inference_function}): {round(100 * (i + 1) / len(sentences), 1)}%"
     )
-# Store the predictions in a private dataset
-utils.upload_predictions(
-    os.environ["PREDICTIONS_DATASET_NAME"],
-    predictions,
-    model_name,
-    inference_function,
-)
-print(f"Inference completed!")

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 model_name = sys.argv[1]
+commit_id = sys.argv[2]
+inference_function = sys.argv[3]
+utils.update_model_queue(
+    repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
+    model_name=model_name,
+    commit_id=commit_id,
+    inference_function=inference_function,
+    status="in_progress",
+)
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, revision=commit_id)
+    # Load the dataset
+    dataset_name = os.environ["DATASET_NAME"]
+    dataset = datasets.load_dataset(dataset_name)["test"]
+    sentences = dataset["sentence"]
+    labels = {dialect: dataset[dialect] for dialect in DIALECTS_WITH_LABELS}
+    predictions = []
+    for i, sentence in enumerate(sentences):
+        predictions.append(
+            getattr(eval_utils, inference_function)(model, tokenizer, sentence)
+        )
+        print(
+            f"Inference progress ({model_name}, {inference_function}): {round(100 * (i + 1) / len(sentences), 1)}%"
+        )
+    # Store the predictions in a private dataset
+    utils.upload_predictions(
+        os.environ["PREDICTIONS_DATASET_NAME"],
+        predictions,
+        model_name,
+        commit_id,
+        inference_function,
     )
+    print(f"Inference completed!")
+except Exception as e:
+    print(f"An error occurred during inference of {model_name}: {e}")
+    utils.update_model_queue(
+        repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
+        model_name=model_name,
+        commit_id=commit_id,
+        inference_function=inference_function,
+        status="failed (online)",
+    )

utils.py CHANGED Viewed

@@ -8,19 +8,49 @@ def current_seconds_time():
     return round(time.time())
-def upload_predictions(repo_id, predictions, model_name, inference_function):
     api = HfApi()
     timestamp = current_seconds_time()
-    predictions_filename = (
-        f"predictions_{timestamp}_{re.sub('/', '_', model_name)}.json"
     )
     predictions_object = {
         "model_name": model_name,
         "predictions": predictions,
-        "timestamp": timestamp,
         "inference_function": inference_function,
     }
     with open(predictions_filename, "w") as f:

     return round(time.time())
+def form_file_name(model_name, commit_id, inference_function):
+    return f"predictions_{re.sub('/', '_', model_name)}_{commit_id}_{inference_function}.json"
+def update_model_queue(repo_id, model_name, commit_id, inference_function, status):
+    assert status in ["queued", "in_progress", "failed (online)"]
     api = HfApi()
     timestamp = current_seconds_time()
+    predictions_filename = form_file_name(model_name, commit_id, inference_function)
+    predictions_object = {
+        "model_name": model_name,
+        "commit_id": commit_id,
+        "inference_function": inference_function,
+        "last_updated_timestamp": timestamp,
+        "status": status,
+    }
+    with open(predictions_filename, "w") as f:
+        json.dump(predictions_object, f)
+    future = api.upload_file(
+        path_or_fileobj=predictions_filename,
+        path_in_repo=predictions_filename,
+        repo_id=repo_id,
+        repo_type="dataset",
+        run_as_future=True,
     )
+def upload_predictions(repo_id, predictions, model_name, commit_id, inference_function):
+    api = HfApi()
+    timestamp = current_seconds_time()
+    predictions_filename = form_file_name(model_name, commit_id, inference_function)
     predictions_object = {
         "model_name": model_name,
         "predictions": predictions,
         "inference_function": inference_function,
+        "last_updated_timestamp": timestamp,
+        "status": "completed",
     }
     with open(predictions_filename, "w") as f: