Implement a basic version of a queuing system
Browse files- app.py +44 -4
- background_inference.py +44 -23
- utils.py +34 -4
app.py
CHANGED
@@ -14,6 +14,8 @@ import numpy as np
|
|
14 |
import pandas as pd
|
15 |
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
|
16 |
|
|
|
|
|
17 |
|
18 |
st.title("NADI 2024 Leaderboard")
|
19 |
st.write(
|
@@ -152,9 +154,36 @@ with tab1:
|
|
152 |
"""
|
153 |
)
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
with tab2:
|
157 |
model_name = st.text_input("Enter a model's name on HF")
|
|
|
|
|
|
|
158 |
inference_functions_names = [
|
159 |
func_name for func_name, _ in getmembers(eval_utils, isfunction)
|
160 |
]
|
@@ -163,6 +192,12 @@ with tab2:
|
|
163 |
inference_functions_names,
|
164 |
)
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
# Show the docstring of the inference functions
|
167 |
inference_functions_docstring = [
|
168 |
getattr(eval_utils, func).__doc__ for func in inference_functions_names
|
@@ -181,8 +216,13 @@ with tab2:
|
|
181 |
"Note: We are happy to discuss adding new custom inference methods for your models."
|
182 |
)
|
183 |
|
184 |
-
if model_name:
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
187 |
)
|
188 |
-
st.info(f"
|
|
|
14 |
import pandas as pd
|
15 |
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
|
16 |
|
17 |
+
from huggingface_hub import HfApi
|
18 |
+
api = HfApi()
|
19 |
|
20 |
st.title("NADI 2024 Leaderboard")
|
21 |
st.write(
|
|
|
154 |
"""
|
155 |
)
|
156 |
|
157 |
+
# Evaluate the models queued
|
158 |
+
if model_predictions_rows:
|
159 |
+
models_to_be_evaluated = []
|
160 |
+
model_in_progress = []
|
161 |
+
|
162 |
+
for row in model_predictions_rows:
|
163 |
+
if row["status"] == "queued":
|
164 |
+
models_to_be_evaluated.append(row)
|
165 |
+
elif row["status"] == "in_progress":
|
166 |
+
model_in_progress.append(row)
|
167 |
+
|
168 |
+
if model_in_progress == []:
|
169 |
+
for row in models_to_be_evaluated:
|
170 |
+
# Evaluate the model
|
171 |
+
subprocess.Popen(
|
172 |
+
[
|
173 |
+
"python",
|
174 |
+
"background_inference.py",
|
175 |
+
row["model_name"],
|
176 |
+
row["commit_id"],
|
177 |
+
row["inference_function"],
|
178 |
+
]
|
179 |
+
)
|
180 |
+
print(f"Started evaluating of {row['model_name']}.")
|
181 |
|
182 |
with tab2:
|
183 |
model_name = st.text_input("Enter a model's name on HF")
|
184 |
+
model_revision = st.text_input(
|
185 |
+
"Enter a model's revision on HF (commit id, or branch name)", placeholder="main"
|
186 |
+
)
|
187 |
inference_functions_names = [
|
188 |
func_name for func_name, _ in getmembers(eval_utils, isfunction)
|
189 |
]
|
|
|
192 |
inference_functions_names,
|
193 |
)
|
194 |
|
195 |
+
if model_name and model_revision:
|
196 |
+
# Get the model's commit id
|
197 |
+
commit_id = api.list_repo_commits(model_name, revision=model_revision)[
|
198 |
+
0
|
199 |
+
].commit_id
|
200 |
+
|
201 |
# Show the docstring of the inference functions
|
202 |
inference_functions_docstring = [
|
203 |
getattr(eval_utils, func).__doc__ for func in inference_functions_names
|
|
|
216 |
"Note: We are happy to discuss adding new custom inference methods for your models."
|
217 |
)
|
218 |
|
219 |
+
if model_name and model_name and inference_function:
|
220 |
+
# Add the model to the evaluation queue
|
221 |
+
utils.update_model_queue(
|
222 |
+
repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
|
223 |
+
model_name=model_name,
|
224 |
+
commit_id=commit_id,
|
225 |
+
inference_function=inference_function,
|
226 |
+
status="queued",
|
227 |
)
|
228 |
+
st.info(f"The evaluation of the model {model_name} is queued for processing.")
|
background_inference.py
CHANGED
@@ -7,33 +7,54 @@ from constants import DIALECTS_WITH_LABELS
|
|
7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
8 |
|
9 |
model_name = sys.argv[1]
|
10 |
-
|
|
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
for
|
24 |
-
|
25 |
-
|
26 |
-
)
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
)
|
30 |
|
31 |
-
|
32 |
-
utils.upload_predictions(
|
33 |
-
os.environ["PREDICTIONS_DATASET_NAME"],
|
34 |
-
predictions,
|
35 |
-
model_name,
|
36 |
-
inference_function,
|
37 |
-
)
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
8 |
|
9 |
model_name = sys.argv[1]
|
10 |
+
commit_id = sys.argv[2]
|
11 |
+
inference_function = sys.argv[3]
|
12 |
|
13 |
+
utils.update_model_queue(
|
14 |
+
repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
|
15 |
+
model_name=model_name,
|
16 |
+
commit_id=commit_id,
|
17 |
+
inference_function=inference_function,
|
18 |
+
status="in_progress",
|
19 |
+
)
|
20 |
|
21 |
+
try:
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
|
23 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name, revision=commit_id)
|
24 |
|
25 |
+
# Load the dataset
|
26 |
+
dataset_name = os.environ["DATASET_NAME"]
|
27 |
+
dataset = datasets.load_dataset(dataset_name)["test"]
|
28 |
|
29 |
+
sentences = dataset["sentence"]
|
30 |
+
labels = {dialect: dataset[dialect] for dialect in DIALECTS_WITH_LABELS}
|
31 |
+
|
32 |
+
predictions = []
|
33 |
+
for i, sentence in enumerate(sentences):
|
34 |
+
predictions.append(
|
35 |
+
getattr(eval_utils, inference_function)(model, tokenizer, sentence)
|
36 |
+
)
|
37 |
+
print(
|
38 |
+
f"Inference progress ({model_name}, {inference_function}): {round(100 * (i + 1) / len(sentences), 1)}%"
|
39 |
+
)
|
40 |
+
|
41 |
+
# Store the predictions in a private dataset
|
42 |
+
utils.upload_predictions(
|
43 |
+
os.environ["PREDICTIONS_DATASET_NAME"],
|
44 |
+
predictions,
|
45 |
+
model_name,
|
46 |
+
commit_id,
|
47 |
+
inference_function,
|
48 |
)
|
49 |
|
50 |
+
print(f"Inference completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
except Exception as e:
|
53 |
+
print(f"An error occurred during inference of {model_name}: {e}")
|
54 |
+
utils.update_model_queue(
|
55 |
+
repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
|
56 |
+
model_name=model_name,
|
57 |
+
commit_id=commit_id,
|
58 |
+
inference_function=inference_function,
|
59 |
+
status="failed (online)",
|
60 |
+
)
|
utils.py
CHANGED
@@ -8,19 +8,49 @@ def current_seconds_time():
|
|
8 |
return round(time.time())
|
9 |
|
10 |
|
11 |
-
def
|
|
|
|
|
|
|
|
|
|
|
12 |
api = HfApi()
|
13 |
|
14 |
timestamp = current_seconds_time()
|
15 |
-
predictions_filename = (
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
predictions_object = {
|
20 |
"model_name": model_name,
|
21 |
"predictions": predictions,
|
22 |
-
"timestamp": timestamp,
|
23 |
"inference_function": inference_function,
|
|
|
|
|
24 |
}
|
25 |
|
26 |
with open(predictions_filename, "w") as f:
|
|
|
8 |
return round(time.time())
|
9 |
|
10 |
|
11 |
+
def form_file_name(model_name, commit_id, inference_function):
|
12 |
+
return f"predictions_{re.sub('/', '_', model_name)}_{commit_id}_{inference_function}.json"
|
13 |
+
|
14 |
+
|
15 |
+
def update_model_queue(repo_id, model_name, commit_id, inference_function, status):
|
16 |
+
assert status in ["queued", "in_progress", "failed (online)"]
|
17 |
api = HfApi()
|
18 |
|
19 |
timestamp = current_seconds_time()
|
20 |
+
predictions_filename = form_file_name(model_name, commit_id, inference_function)
|
21 |
+
|
22 |
+
predictions_object = {
|
23 |
+
"model_name": model_name,
|
24 |
+
"commit_id": commit_id,
|
25 |
+
"inference_function": inference_function,
|
26 |
+
"last_updated_timestamp": timestamp,
|
27 |
+
"status": status,
|
28 |
+
}
|
29 |
+
|
30 |
+
with open(predictions_filename, "w") as f:
|
31 |
+
json.dump(predictions_object, f)
|
32 |
+
|
33 |
+
future = api.upload_file(
|
34 |
+
path_or_fileobj=predictions_filename,
|
35 |
+
path_in_repo=predictions_filename,
|
36 |
+
repo_id=repo_id,
|
37 |
+
repo_type="dataset",
|
38 |
+
run_as_future=True,
|
39 |
)
|
40 |
|
41 |
+
|
42 |
+
def upload_predictions(repo_id, predictions, model_name, commit_id, inference_function):
|
43 |
+
api = HfApi()
|
44 |
+
|
45 |
+
timestamp = current_seconds_time()
|
46 |
+
predictions_filename = form_file_name(model_name, commit_id, inference_function)
|
47 |
+
|
48 |
predictions_object = {
|
49 |
"model_name": model_name,
|
50 |
"predictions": predictions,
|
|
|
51 |
"inference_function": inference_function,
|
52 |
+
"last_updated_timestamp": timestamp,
|
53 |
+
"status": "completed",
|
54 |
}
|
55 |
|
56 |
with open(predictions_filename, "w") as f:
|