Clémentine commited on
Commit
e295ac3
·
1 Parent(s): adb0416

added automatic update of the best LLM models

Browse files
Files changed (3) hide show
  1. app.py +3 -1
  2. requirements.txt +1 -1
  3. src/manage_collections.py +65 -0
app.py CHANGED
@@ -27,6 +27,7 @@ from src.display_models.utils import (
27
  styled_message,
28
  styled_warning,
29
  )
 
30
  from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
31
  from src.rate_limiting import user_submission_permission
32
 
@@ -88,6 +89,7 @@ snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="
88
  requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
89
 
90
  original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
 
91
  leaderboard_df = original_df.copy()
92
 
93
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
@@ -306,7 +308,7 @@ def filter_models(
306
  if show_deleted:
307
  filtered_df = df
308
  else: # Show only still on the hub models
309
- filtered_df = df[df[AutoEvalColumn.still_on_hub.name] is True]
310
 
311
  type_emoji = [t[0] for t in type_query]
312
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
 
27
  styled_message,
28
  styled_warning,
29
  )
30
+ from src.manage_collections import update_collections
31
  from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
32
  from src.rate_limiting import user_submission_permission
33
 
 
89
  requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
90
 
91
  original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
92
+ update_collections(original_df.copy())
93
  leaderboard_df = original_df.copy()
94
 
95
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
 
308
  if show_deleted:
309
  filtered_df = df
310
  else: # Show only still on the hub models
311
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
312
 
313
  type_emoji = [t[0] for t in type_query]
314
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
requirements.txt CHANGED
@@ -25,7 +25,7 @@ gradio-client==0.5.0
25
  h11==0.14.0
26
  httpcore==0.17.0
27
  httpx==0.24.0
28
- huggingface-hub==0.16.4
29
  idna==3.4
30
  Jinja2==3.1.2
31
  jsonschema==4.17.3
 
25
  h11==0.14.0
26
  httpcore==0.17.0
27
  httpx==0.24.0
28
+ huggingface-hub==0.18.0
29
  idna==3.4
30
  Jinja2==3.1.2
31
  jsonschema==4.17.3
src/manage_collections.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from pandas import DataFrame
4
+ from huggingface_hub import get_collection, add_collection_item, delete_collection_item
5
+ from huggingface_hub.utils._errors import HfHubHTTPError
6
+
7
+ from src.display_models.model_metadata_type import ModelType
8
+ from src.display_models.utils import AutoEvalColumn
9
+
10
+ H4_TOKEN = os.environ.get("H4_TOKEN", None)
11
+
12
+ path_to_collection = "HuggingFaceH4/current-best-models-of-the-open-llm-leaderboard-652d64cf619fc62beef5c2a3"
13
+ intervals = {
14
+ "1B": pd.Interval(0, 1.5, closed="right"),
15
+ "3B": pd.Interval(2.5, 3.5, closed="neither"),
16
+ "7B": pd.Interval(6, 8, closed="neither"),
17
+ "13B": pd.Interval(10, 14, closed="neither"),
18
+ "30B":pd.Interval(25, 35, closed="neither"),
19
+ "60B": pd.Interval(55, 65, closed="neither"),
20
+ }
21
+
22
+ def update_collections(df: DataFrame):
23
+ """This function updates the Open LLM Leaderboard model collection with the latest best models for
24
+ each size category and type.
25
+ """
26
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
27
+
28
+ cur_best_models = []
29
+
30
+ for type in ModelType:
31
+ if type.value.name == "": continue
32
+ for size in intervals:
33
+ # We filter the df to gather the relevant models
34
+ type_emoji = [t[0] for t in type.value.symbol]
35
+ filtered_df = df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
36
+
37
+ numeric_interval = pd.IntervalIndex([intervals[size]])
38
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
39
+ filtered_df = filtered_df.loc[mask]
40
+
41
+ best_models = list(filtered_df.sort_values(AutoEvalColumn.average.name, ascending=False)[AutoEvalColumn.dummy.name])
42
+
43
+ # We add them one by one to the leaderboard
44
+ for model in best_models:
45
+ # We can use collection = get_collection to grab the id of the last item, then place it where we want using update_collection but it's costly...
46
+ # We could also remove exists_ok to update the note to include the date of apparition of the model for ex.
47
+ try:
48
+ add_collection_item(
49
+ path_to_collection,
50
+ item_id=model,
51
+ item_type="model",
52
+ exists_ok=True,
53
+ note=f"Best {type.to_str(' ')} model of {size} on the leaderboard today!",
54
+ token=H4_TOKEN
55
+ )
56
+ cur_best_models.append(model)
57
+ break
58
+ except HfHubHTTPError:
59
+ continue
60
+
61
+ collection = get_collection(path_to_collection, token=H4_TOKEN)
62
+ for item in collection.items:
63
+ if item.item_id not in cur_best_models:
64
+ delete_collection_item(collection_slug=path_to_collection, item_object_id=item.item_object_id, token=H4_TOKEN)
65
+