AMR-KELEG commited on
Commit
c147e35
·
1 Parent(s): 84916fc

Implement a basic version of a queuing system

Browse files
Files changed (3) hide show
  1. app.py +44 -4
  2. background_inference.py +44 -23
  3. utils.py +34 -4
app.py CHANGED
@@ -14,6 +14,8 @@ import numpy as np
14
  import pandas as pd
15
  from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
16
 
 
 
17
 
18
  st.title("NADI 2024 Leaderboard")
19
  st.write(
@@ -152,9 +154,36 @@ with tab1:
152
  """
153
  )
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  with tab2:
157
  model_name = st.text_input("Enter a model's name on HF")
 
 
 
158
  inference_functions_names = [
159
  func_name for func_name, _ in getmembers(eval_utils, isfunction)
160
  ]
@@ -163,6 +192,12 @@ with tab2:
163
  inference_functions_names,
164
  )
165
 
 
 
 
 
 
 
166
  # Show the docstring of the inference functions
167
  inference_functions_docstring = [
168
  getattr(eval_utils, func).__doc__ for func in inference_functions_names
@@ -181,8 +216,13 @@ with tab2:
181
  "Note: We are happy to discuss adding new custom inference methods for your models."
182
  )
183
 
184
- if model_name:
185
- subprocess.Popen(
186
- ["python", "background_inference.py", model_name, inference_function]
 
 
 
 
 
187
  )
188
- st.info(f"Your evaluation request is being processed.")
 
14
  import pandas as pd
15
  from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
16
 
17
+ from huggingface_hub import HfApi
18
+ api = HfApi()
19
 
20
  st.title("NADI 2024 Leaderboard")
21
  st.write(
 
154
  """
155
  )
156
 
157
+ # Evaluate the models queued
158
+ if model_predictions_rows:
159
+ models_to_be_evaluated = []
160
+ model_in_progress = []
161
+
162
+ for row in model_predictions_rows:
163
+ if row["status"] == "queued":
164
+ models_to_be_evaluated.append(row)
165
+ elif row["status"] == "in_progress":
166
+ model_in_progress.append(row)
167
+
168
+ if model_in_progress == []:
169
+ for row in models_to_be_evaluated:
170
+ # Evaluate the model
171
+ subprocess.Popen(
172
+ [
173
+ "python",
174
+ "background_inference.py",
175
+ row["model_name"],
176
+ row["commit_id"],
177
+ row["inference_function"],
178
+ ]
179
+ )
180
+ print(f"Started evaluating of {row['model_name']}.")
181
 
182
  with tab2:
183
  model_name = st.text_input("Enter a model's name on HF")
184
+ model_revision = st.text_input(
185
+ "Enter a model's revision on HF (commit id, or branch name)", placeholder="main"
186
+ )
187
  inference_functions_names = [
188
  func_name for func_name, _ in getmembers(eval_utils, isfunction)
189
  ]
 
192
  inference_functions_names,
193
  )
194
 
195
+ if model_name and model_revision:
196
+ # Get the model's commit id
197
+ commit_id = api.list_repo_commits(model_name, revision=model_revision)[
198
+ 0
199
+ ].commit_id
200
+
201
  # Show the docstring of the inference functions
202
  inference_functions_docstring = [
203
  getattr(eval_utils, func).__doc__ for func in inference_functions_names
 
216
  "Note: We are happy to discuss adding new custom inference methods for your models."
217
  )
218
 
219
+ if model_name and model_name and inference_function:
220
+ # Add the model to the evaluation queue
221
+ utils.update_model_queue(
222
+ repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
223
+ model_name=model_name,
224
+ commit_id=commit_id,
225
+ inference_function=inference_function,
226
+ status="queued",
227
  )
228
+ st.info(f"The evaluation of the model {model_name} is queued for processing.")
background_inference.py CHANGED
@@ -7,33 +7,54 @@ from constants import DIALECTS_WITH_LABELS
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
 
9
  model_name = sys.argv[1]
10
- inference_function = sys.argv[2]
 
11
 
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
 
 
14
 
15
- # Load the dataset
16
- dataset_name = os.environ["DATASET_NAME"]
17
- dataset = datasets.load_dataset(dataset_name)["test"]
18
 
19
- sentences = dataset["sentence"]
20
- labels = {dialect: dataset[dialect] for dialect in DIALECTS_WITH_LABELS}
 
21
 
22
- predictions = []
23
- for i, sentence in enumerate(sentences):
24
- predictions.append(
25
- getattr(eval_utils, inference_function)(model, tokenizer, sentence)
26
- )
27
- print(
28
- f"Inference progress ({model_name}, {inference_function}): {round(100 * (i + 1) / len(sentences), 1)}%"
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
 
31
- # Store the predictions in a private dataset
32
- utils.upload_predictions(
33
- os.environ["PREDICTIONS_DATASET_NAME"],
34
- predictions,
35
- model_name,
36
- inference_function,
37
- )
38
 
39
- print(f"Inference completed!")
 
 
 
 
 
 
 
 
 
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
 
9
  model_name = sys.argv[1]
10
+ commit_id = sys.argv[2]
11
+ inference_function = sys.argv[3]
12
 
13
+ utils.update_model_queue(
14
+ repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
15
+ model_name=model_name,
16
+ commit_id=commit_id,
17
+ inference_function=inference_function,
18
+ status="in_progress",
19
+ )
20
 
21
+ try:
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
23
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, revision=commit_id)
24
 
25
+ # Load the dataset
26
+ dataset_name = os.environ["DATASET_NAME"]
27
+ dataset = datasets.load_dataset(dataset_name)["test"]
28
 
29
+ sentences = dataset["sentence"]
30
+ labels = {dialect: dataset[dialect] for dialect in DIALECTS_WITH_LABELS}
31
+
32
+ predictions = []
33
+ for i, sentence in enumerate(sentences):
34
+ predictions.append(
35
+ getattr(eval_utils, inference_function)(model, tokenizer, sentence)
36
+ )
37
+ print(
38
+ f"Inference progress ({model_name}, {inference_function}): {round(100 * (i + 1) / len(sentences), 1)}%"
39
+ )
40
+
41
+ # Store the predictions in a private dataset
42
+ utils.upload_predictions(
43
+ os.environ["PREDICTIONS_DATASET_NAME"],
44
+ predictions,
45
+ model_name,
46
+ commit_id,
47
+ inference_function,
48
  )
49
 
50
+ print(f"Inference completed!")
 
 
 
 
 
 
51
 
52
+ except Exception as e:
53
+ print(f"An error occurred during inference of {model_name}: {e}")
54
+ utils.update_model_queue(
55
+ repo_id=os.environ["PREDICTIONS_DATASET_NAME"],
56
+ model_name=model_name,
57
+ commit_id=commit_id,
58
+ inference_function=inference_function,
59
+ status="failed (online)",
60
+ )
utils.py CHANGED
@@ -8,19 +8,49 @@ def current_seconds_time():
8
  return round(time.time())
9
 
10
 
11
- def upload_predictions(repo_id, predictions, model_name, inference_function):
 
 
 
 
 
12
  api = HfApi()
13
 
14
  timestamp = current_seconds_time()
15
- predictions_filename = (
16
- f"predictions_{timestamp}_{re.sub('/', '_', model_name)}.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
 
 
 
 
 
 
 
 
19
  predictions_object = {
20
  "model_name": model_name,
21
  "predictions": predictions,
22
- "timestamp": timestamp,
23
  "inference_function": inference_function,
 
 
24
  }
25
 
26
  with open(predictions_filename, "w") as f:
 
8
  return round(time.time())
9
 
10
 
11
+ def form_file_name(model_name, commit_id, inference_function):
12
+ return f"predictions_{re.sub('/', '_', model_name)}_{commit_id}_{inference_function}.json"
13
+
14
+
15
+ def update_model_queue(repo_id, model_name, commit_id, inference_function, status):
16
+ assert status in ["queued", "in_progress", "failed (online)"]
17
  api = HfApi()
18
 
19
  timestamp = current_seconds_time()
20
+ predictions_filename = form_file_name(model_name, commit_id, inference_function)
21
+
22
+ predictions_object = {
23
+ "model_name": model_name,
24
+ "commit_id": commit_id,
25
+ "inference_function": inference_function,
26
+ "last_updated_timestamp": timestamp,
27
+ "status": status,
28
+ }
29
+
30
+ with open(predictions_filename, "w") as f:
31
+ json.dump(predictions_object, f)
32
+
33
+ future = api.upload_file(
34
+ path_or_fileobj=predictions_filename,
35
+ path_in_repo=predictions_filename,
36
+ repo_id=repo_id,
37
+ repo_type="dataset",
38
+ run_as_future=True,
39
  )
40
 
41
+
42
+ def upload_predictions(repo_id, predictions, model_name, commit_id, inference_function):
43
+ api = HfApi()
44
+
45
+ timestamp = current_seconds_time()
46
+ predictions_filename = form_file_name(model_name, commit_id, inference_function)
47
+
48
  predictions_object = {
49
  "model_name": model_name,
50
  "predictions": predictions,
 
51
  "inference_function": inference_function,
52
+ "last_updated_timestamp": timestamp,
53
+ "status": "completed",
54
  }
55
 
56
  with open(predictions_filename, "w") as f: