ClΓ©mentine commited on
Commit
0c7ef71
Β·
1 Parent(s): 9d02a6b
app.py CHANGED
@@ -27,7 +27,7 @@ from src.display.utils import (
27
  WeightType,
28
  Precision
29
  )
30
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
  from src.submission.submit import add_new_eval
33
  from src.tools.collections import update_collections
@@ -43,33 +43,52 @@ enable_space_ci()
43
  def restart_space():
44
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
45
 
46
- try:
47
- print(EVAL_REQUESTS_PATH)
48
- snapshot_download(
49
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
50
- )
51
- except Exception:
52
- restart_space()
53
- try:
54
- print(EVAL_RESULTS_PATH)
55
- snapshot_download(
56
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
58
- except Exception:
59
- restart_space()
60
 
 
61
 
62
- raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
63
- update_collections(original_df.copy())
64
- leaderboard_df = original_df.copy()
 
 
65
 
66
- plot_df = create_plot_df(create_scores_df(raw_data))
67
 
68
- (
69
- finished_eval_queue_df,
70
- running_eval_queue_df,
71
- pending_eval_queue_df,
72
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
73
 
74
 
75
  # Searching and filtering
 
27
  WeightType,
28
  Precision
29
  )
30
+ from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
  from src.submission.submit import add_new_eval
33
  from src.tools.collections import update_collections
 
43
  def restart_space():
44
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
45
 
46
+
47
+ def init_space():
48
+ try:
49
+ print(EVAL_REQUESTS_PATH)
50
+ snapshot_download(
51
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
52
+ )
53
+ except Exception:
54
+ restart_space()
55
+ try:
56
+ print(DYNAMIC_INFO_PATH)
57
+ snapshot_download(
58
+ repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
59
+ )
60
+ except Exception:
61
+ restart_space()
62
+ try:
63
+ print(EVAL_RESULTS_PATH)
64
+ snapshot_download(
65
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
66
+ )
67
+ except Exception:
68
+ restart_space()
69
+
70
+
71
+ raw_data, original_df = get_leaderboard_df(
72
+ results_path=EVAL_RESULTS_PATH,
73
+ requests_path=EVAL_REQUESTS_PATH,
74
+ dynamic_path=DYNAMIC_INFO_FILE_PATH,
75
+ cols=COLS,
76
+ benchmark_cols=BENCHMARK_COLS
77
  )
78
+ update_collections(original_df.copy())
79
+ leaderboard_df = original_df.copy()
80
 
81
+ plot_df = create_plot_df(create_scores_df(raw_data))
82
 
83
+ (
84
+ finished_eval_queue_df,
85
+ running_eval_queue_df,
86
+ pending_eval_queue_df,
87
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
88
 
89
+ return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
90
 
91
+ leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
 
 
 
 
92
 
93
 
94
  # Searching and filtering
src/envs.py CHANGED
@@ -7,6 +7,7 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
7
 
8
  REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
9
  QUEUE_REPO = "open-llm-leaderboard/requests"
 
10
  RESULTS_REPO = "open-llm-leaderboard/results"
11
 
12
  PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
@@ -18,6 +19,8 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
20
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
 
21
 
22
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
23
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
 
7
 
8
  REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
9
  QUEUE_REPO = "open-llm-leaderboard/requests"
10
+ DYNAMIC_INFO_REPO = "open-llm-leaderboard/dynamic_model_information"
11
  RESULTS_REPO = "open-llm-leaderboard/results"
12
 
13
  PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
 
19
 
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
+ DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info")
23
+ DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
24
 
25
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
26
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
src/leaderboard/read_evals.py CHANGED
@@ -11,7 +11,6 @@ from huggingface_hub import ModelCard
11
 
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
- from src.submission.check_validity import is_model_on_hub, check_model_card
15
 
16
 
17
  @dataclass
@@ -34,6 +33,7 @@ class EvalResult:
34
  still_on_hub: bool = False
35
  is_merge: bool = False
36
  flagged: bool = False
 
37
 
38
  @classmethod
39
  def init_from_json_file(self, json_filepath):
@@ -42,13 +42,13 @@ class EvalResult:
42
  data = json.load(fp)
43
 
44
  # We manage the legacy config format
45
- config = data.get("config", data.get("config_general", None))
46
 
47
  # Precision
48
  precision = Precision.from_str(config.get("model_dtype"))
49
 
50
  # Get model and org
51
- org_and_model = config.get("model_name", config.get("model_args", None))
52
  org_and_model = org_and_model.split("/", 1)
53
 
54
  if len(org_and_model) == 1:
@@ -61,37 +61,6 @@ class EvalResult:
61
  result_key = f"{org}_{model}_{precision.value.name}"
62
  full_model = "/".join(org_and_model)
63
 
64
- still_on_hub, error, model_config = is_model_on_hub(
65
- full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
66
- )
67
- architecture = "?"
68
- if model_config is not None:
69
- architectures = getattr(model_config, "architectures", None)
70
- if architectures:
71
- architecture = ";".join(architectures)
72
-
73
- # If the model doesn't have a model card or a license, we consider it's deleted
74
- if still_on_hub:
75
- try:
76
- if check_model_card(full_model)[0] is False:
77
- still_on_hub = False
78
- except Exception:
79
- still_on_hub = False
80
-
81
- # Check if the model is a merge
82
- is_merge_from_metadata = False
83
- flagged = False
84
- if still_on_hub:
85
- model_card = ModelCard.load(full_model)
86
-
87
- if model_card.data.tags:
88
- is_merge_from_metadata = "merge" in model_card.data.tags
89
- merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
90
- # If the model is a merge but not saying it in the metadata, we flag it
91
- is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
92
- flagged = is_merge_from_model_card and not is_merge_from_metadata
93
-
94
-
95
  # Extract results available in this file (some results are split in several files)
96
  results = {}
97
  for task in Tasks:
@@ -128,10 +97,6 @@ class EvalResult:
128
  results=results,
129
  precision=precision,
130
  revision= config.get("model_sha", ""),
131
- still_on_hub=still_on_hub,
132
- architecture=architecture,
133
- is_merge=is_merge_from_metadata,
134
- flagged=flagged,
135
  )
136
 
137
  def update_with_request_file(self, requests_path):
@@ -143,13 +108,21 @@ class EvalResult:
143
  request = json.load(f)
144
  self.model_type = ModelType.from_str(request.get("model_type", ""))
145
  self.weight_type = WeightType[request.get("weight_type", "Original")]
146
- self.license = request.get("license", "?")
147
- self.likes = request.get("likes", 0)
148
  self.num_params = request.get("params", 0)
149
  self.date = request.get("submitted_time", "")
 
150
  except Exception:
151
  print(f"Could not find request file for {self.org}/{self.model}")
152
 
 
 
 
 
 
 
 
 
 
153
  def to_dict(self):
154
  """Converts the Eval Result to a dict compatible with our dataframe display"""
155
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
@@ -158,7 +131,7 @@ class EvalResult:
158
  AutoEvalColumn.precision.name: self.precision.value.name,
159
  AutoEvalColumn.model_type.name: self.model_type.value.name,
160
  AutoEvalColumn.merged.name: self.is_merge,
161
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, # + "πŸ₯¦" if self.is_merge,
162
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
163
  AutoEvalColumn.architecture.name: self.architecture,
164
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
@@ -170,7 +143,6 @@ class EvalResult:
170
  AutoEvalColumn.params.name: self.num_params,
171
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
172
  AutoEvalColumn.flagged.name: self.flagged
173
-
174
  }
175
 
176
  for task in Tasks:
@@ -201,7 +173,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
201
  return request_file
202
 
203
 
204
- def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
205
  """From the path of the results folder root, extract all needed info for results"""
206
  model_result_filepaths = []
207
 
@@ -219,11 +191,15 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
219
  for file in files:
220
  model_result_filepaths.append(os.path.join(root, file))
221
 
 
 
 
222
  eval_results = {}
223
  for model_result_filepath in model_result_filepaths:
224
  # Creation of result
225
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
226
  eval_result.update_with_request_file(requests_path)
 
227
 
228
  # Store results of same eval together
229
  eval_name = eval_result.eval_name
 
11
 
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 
14
 
15
 
16
  @dataclass
 
33
  still_on_hub: bool = False
34
  is_merge: bool = False
35
  flagged: bool = False
36
+ tags: list = None
37
 
38
  @classmethod
39
  def init_from_json_file(self, json_filepath):
 
42
  data = json.load(fp)
43
 
44
  # We manage the legacy config format
45
+ config = data.get("config_general")
46
 
47
  # Precision
48
  precision = Precision.from_str(config.get("model_dtype"))
49
 
50
  # Get model and org
51
+ org_and_model = config.get("model_name")
52
  org_and_model = org_and_model.split("/", 1)
53
 
54
  if len(org_and_model) == 1:
 
61
  result_key = f"{org}_{model}_{precision.value.name}"
62
  full_model = "/".join(org_and_model)
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Extract results available in this file (some results are split in several files)
65
  results = {}
66
  for task in Tasks:
 
97
  results=results,
98
  precision=precision,
99
  revision= config.get("model_sha", ""),
 
 
 
 
100
  )
101
 
102
  def update_with_request_file(self, requests_path):
 
108
  request = json.load(f)
109
  self.model_type = ModelType.from_str(request.get("model_type", ""))
110
  self.weight_type = WeightType[request.get("weight_type", "Original")]
 
 
111
  self.num_params = request.get("params", 0)
112
  self.date = request.get("submitted_time", "")
113
+ self.architecture = request["architectures"]
114
  except Exception:
115
  print(f"Could not find request file for {self.org}/{self.model}")
116
 
117
+ def update_with_dynamic_file_dict(self, file_dict):
118
+ self.license = file_dict.get("license", "?")
119
+ self.likes = file_dict.get("likes", 0)
120
+ self.still_on_hub = file_dict["still_on_hub"]
121
+ self.flagged = any("flagged" in tag for tag in file_dict["tags"])
122
+ self.is_merge = "merge" in file_dict["tags"]
123
+ self.tags = file_dict["tags"]
124
+
125
+
126
  def to_dict(self):
127
  """Converts the Eval Result to a dict compatible with our dataframe display"""
128
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
131
  AutoEvalColumn.precision.name: self.precision.value.name,
132
  AutoEvalColumn.model_type.name: self.model_type.value.name,
133
  AutoEvalColumn.merged.name: self.is_merge,
134
+ AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
135
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
136
  AutoEvalColumn.architecture.name: self.architecture,
137
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
 
143
  AutoEvalColumn.params.name: self.num_params,
144
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
145
  AutoEvalColumn.flagged.name: self.flagged
 
146
  }
147
 
148
  for task in Tasks:
 
173
  return request_file
174
 
175
 
176
+ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
177
  """From the path of the results folder root, extract all needed info for results"""
178
  model_result_filepaths = []
179
 
 
191
  for file in files:
192
  model_result_filepaths.append(os.path.join(root, file))
193
 
194
+ with open(dynamic_path) as f:
195
+ dynamic_data = json.load(f)
196
+
197
  eval_results = {}
198
  for model_result_filepath in model_result_filepaths:
199
  # Creation of result
200
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
201
  eval_result.update_with_request_file(requests_path)
202
+ eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
203
 
204
  # Store results of same eval together
205
  eval_name = eval_result.eval_name
src/populate.py CHANGED
@@ -9,8 +9,8 @@ from src.leaderboard.filter_models import filter_models
9
  from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
- raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
  all_data_json.append(baseline_row)
16
  filter_models(all_data_json)
 
9
  from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
+ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
+ raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
  all_data_json.append(baseline_row)
16
  filter_models(all_data_json)
{scripts β†’ src/scripts}/create_request_file.py RENAMED
@@ -1,36 +1,21 @@
1
  import json
2
  import os
3
  import pprint
4
- import re
5
  from datetime import datetime, timezone
6
 
7
  import click
8
  from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
10
 
 
 
 
11
  EVAL_REQUESTS_PATH = "eval-queue"
12
  QUEUE_REPO = "open-llm-leaderboard/requests"
13
 
14
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
15
- model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
16
- weight_types = ("Original", "Delta", "Adapter")
17
-
18
-
19
- def get_model_size(model_info, precision: str):
20
- size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
21
- try:
22
- model_size = round(model_info.safetensors["total"] / 1e9, 3)
23
- except (AttributeError, TypeError):
24
- try:
25
- size_match = re.search(size_pattern, model_info.modelId.lower())
26
- model_size = size_match.group(0)
27
- model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
28
- except AttributeError:
29
- return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
30
-
31
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
32
- model_size = size_factor * model_size
33
- return model_size
34
 
35
 
36
  def main():
 
1
  import json
2
  import os
3
  import pprint
 
4
  from datetime import datetime, timezone
5
 
6
  import click
7
  from colorama import Fore
8
  from huggingface_hub import HfApi, snapshot_download
9
 
10
+ from src.submission.check_validity import get_model_size
11
+ from src.display.utils import ModelType, WeightType
12
+
13
  EVAL_REQUESTS_PATH = "eval-queue"
14
  QUEUE_REPO = "open-llm-leaderboard/requests"
15
 
16
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
17
+ model_types = [e.name for e in ModelType]
18
+ weight_types = [e.name for e in WeightType]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  def main():
src/scripts/update_all_request_files.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, ModelFilter, snapshot_download
2
+ from huggingface_hub import ModelCard
3
+
4
+ import json
5
+ import os
6
+ import time
7
+ import shutil
8
+ from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_size
9
+ from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, API
10
+
11
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
+
13
+ TMP_FOLDER = "tmp_requests"
14
+ snapshot_download(
15
+ repo_id=DYNAMIC_INFO_REPO, local_dir=TMP_FOLDER, repo_type="dataset", tqdm_class=None, etag_timeout=30
16
+ )
17
+
18
+ # Get models
19
+ start = time.time()
20
+
21
+ models = list(API.list_models(
22
+ filter=ModelFilter(task="text-generation"),
23
+ full=False,
24
+ cardData=True,
25
+ fetch_config=True,
26
+ ))
27
+
28
+ print(f"Downloaded list of models in {time.time() - start:.2f} seconds")
29
+
30
+ def update_models(file_path, models):
31
+ """
32
+ Search through all JSON files in the specified root folder and its subfolders,
33
+ and update the likes key in JSON dict from value of input dict
34
+ """
35
+ with open(file_path, "r") as f:
36
+ model_infos = json.load(f)
37
+ for model_id, data in model_infos.items():
38
+ if model_id not in models:
39
+ continue
40
+
41
+ model_cfg = models[model_id]
42
+ data['likes'] = model_cfg.likes
43
+ #data['params'] = get_model_size(model_cfg, data['precision'])
44
+ data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
45
+
46
+ # Is the model still on the hub
47
+ still_on_hub, error, model_config = is_model_on_hub(
48
+ model_name=model_id, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False
49
+ )
50
+ # If the model doesn't have a model card or a license, we consider it's deleted
51
+ if still_on_hub:
52
+ try:
53
+ if check_model_card(model_id)[0] is False:
54
+ still_on_hub = False
55
+ except Exception:
56
+ still_on_hub = False
57
+ data['still_on_hub'] = still_on_hub
58
+
59
+ # Check if the model is a merge
60
+ is_merge_from_metadata = False
61
+ if still_on_hub:
62
+ model_card = ModelCard.load(model_id)
63
+
64
+ # Storing the model metadata
65
+ tags = []
66
+ if model_card.data.tags:
67
+ is_merge_from_metadata = "merge" in model_card.data.tags
68
+ merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
69
+ # If the model is a merge but not saying it in the metadata, we flag it
70
+ is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
71
+ if is_merge_from_model_card:
72
+ tags.append("merge")
73
+ if not is_merge_from_metadata:
74
+ tags.append("flagged:undisclosed_merge")
75
+ if "moe" in model_card.data.tags:
76
+ tags.append("moe")
77
+
78
+ data["tags"] = tags
79
+
80
+ with open(file_path, 'w') as f:
81
+ json.dump(model_infos, f, indent=2)
82
+
83
+ start = time.time()
84
+
85
+ updated_ids = update_models(DYNAMIC_INFO_FILE_PATH, models)
86
+
87
+ print(f"updated in {time.time() - start:.2f} seconds, updated ids: {len(updated_ids)}")
88
+
89
+ API.upload_file(
90
+ path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
91
+ path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
92
+ repo_id=DYNAMIC_INFO_REPO,
93
+ repo_type="dataset",
94
+ commit_message=f"Daily request file update.",
95
+ )
96
+
97
+ shutil.rmtree(TMP_FOLDER)
src/submission/check_validity.py CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime, timedelta, timezone
6
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
- from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig, AutoTokenizer
11
 
12
  from src.envs import HAS_HIGHER_RATE_LIMIT
@@ -36,7 +36,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
36
  return True, ""
37
 
38
 
39
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
40
  try:
41
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
42
  if test_tokenizer:
@@ -65,17 +65,23 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
65
 
66
  def get_model_size(model_info: ModelInfo, precision: str):
67
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
 
68
  try:
69
- model_size = round(model_info.safetensors["total"] / 1e9, 3)
70
- except (AttributeError, TypeError ):
 
 
 
 
 
71
  try:
72
- size_match = re.search(size_pattern, model_info.modelId.lower())
73
  model_size = size_match.group(0)
74
  model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
75
- except AttributeError:
76
  return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
77
 
78
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
79
  model_size = size_factor * model_size
80
  return model_size
81
 
 
6
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
+ from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata
10
  from transformers import AutoConfig, AutoTokenizer
11
 
12
  from src.envs import HAS_HIGHER_RATE_LIMIT
 
36
  return True, ""
37
 
38
 
39
+ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
40
  try:
41
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
42
  if test_tokenizer:
 
65
 
66
  def get_model_size(model_info: ModelInfo, precision: str):
67
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
68
+ safetensors = None
69
  try:
70
+ safetensors = get_safetensors_metadata(model_info.id)
71
+ except Exception as e:
72
+ print(e)
73
+
74
+ if safetensors is not None:
75
+ model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
76
+ else:
77
  try:
78
+ size_match = re.search(size_pattern, model_info.id.lower())
79
  model_size = size_match.group(0)
80
  model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
81
+ except AttributeError as e:
82
  return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
83
 
84
+ size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
85
  model_size = size_factor * model_size
86
  return model_size
87
 
src/submission/submit.py CHANGED
@@ -2,8 +2,10 @@ import json
2
  import os
3
  from datetime import datetime, timezone
4
 
 
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
@@ -65,9 +67,15 @@ def add_new_eval(
65
  return styled_error(f'Base model "{base_model}" {error}')
66
 
67
  if not weight_type == "Adapter":
68
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
69
  if not model_on_hub:
70
  return styled_error(f'Model "{model}" {error}')
 
 
 
 
 
 
71
 
72
  # Is the model info correctly filled?
73
  try:
@@ -86,6 +94,22 @@ def add_new_eval(
86
  modelcard_OK, error_msg = check_model_card(model)
87
  if not modelcard_OK:
88
  return styled_error(error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # Seems good, creating the eval
91
  print("Adding new eval")
@@ -96,13 +120,21 @@ def add_new_eval(
96
  "revision": revision,
97
  "private": private,
98
  "precision": precision,
 
 
99
  "weight_type": weight_type,
100
  "status": "PENDING",
101
  "submitted_time": current_time,
102
  "model_type": model_type,
 
 
 
 
 
103
  "likes": model_info.likes,
104
- "params": model_size,
105
  "license": license,
 
 
106
  }
107
 
108
  # Check for duplicate submission
@@ -126,6 +158,23 @@ def add_new_eval(
126
  commit_message=f"Add {model} to eval queue",
127
  )
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # Remove the local file
130
  os.remove(out_path)
131
 
 
2
  import os
3
  from datetime import datetime, timezone
4
 
5
+ from huggingface_hub import ModelCard
6
+
7
  from src.display.formatting import styled_error, styled_message, styled_warning
8
+ from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
9
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
10
  from src.submission.check_validity import (
11
  already_submitted_models,
 
67
  return styled_error(f'Base model "{base_model}" {error}')
68
 
69
  if not weight_type == "Adapter":
70
+ model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
71
  if not model_on_hub:
72
  return styled_error(f'Model "{model}" {error}')
73
+ architecture = "?"
74
+ if model_config is not None:
75
+ architectures = getattr(model_config, "architectures", None)
76
+ if architectures:
77
+ architecture = ";".join(architectures)
78
+
79
 
80
  # Is the model info correctly filled?
81
  try:
 
94
  modelcard_OK, error_msg = check_model_card(model)
95
  if not modelcard_OK:
96
  return styled_error(error_msg)
97
+
98
+ # Storing the model tags
99
+ tags = []
100
+
101
+ model_card = ModelCard.load(model)
102
+ is_merge_from_metadata = "merge" in model_card.data.tags if model_card.data.tags else False
103
+ merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
104
+ # If the model is a merge but not saying it in the metadata, we flag it
105
+ is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
106
+ if is_merge_from_model_card:
107
+ tags.append("merge")
108
+ if not is_merge_from_metadata:
109
+ tags.append("flagged:undisclosed_merge")
110
+ if "moe" in model_card.data.tags:
111
+ tags.append("moe")
112
+
113
 
114
  # Seems good, creating the eval
115
  print("Adding new eval")
 
120
  "revision": revision,
121
  "private": private,
122
  "precision": precision,
123
+ "params": model_size,
124
+ "architectures": architecture,
125
  "weight_type": weight_type,
126
  "status": "PENDING",
127
  "submitted_time": current_time,
128
  "model_type": model_type,
129
+ "job_id": -1,
130
+ "job_start_time": None,
131
+ }
132
+
133
+ supplementary_info = {
134
  "likes": model_info.likes,
 
135
  "license": license,
136
+ "still_on_hub": True,
137
+ "tags": tags,
138
  }
139
 
140
  # Check for duplicate submission
 
158
  commit_message=f"Add {model} to eval queue",
159
  )
160
 
161
+ with open(DYNAMIC_INFO_FILE_PATH) as f:
162
+ all_supplementary_info = json.load(f)
163
+
164
+ all_supplementary_info[model] = supplementary_info
165
+ with open(DYNAMIC_INFO_FILE_PATH, "w") as f:
166
+ json.dump(all_supplementary_info, f, indent=2)
167
+
168
+ API.upload_file(
169
+ path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
170
+ path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
171
+ repo_id=DYNAMIC_INFO_REPO,
172
+ repo_type="dataset",
173
+ commit_message=f"Add {model} to dynamic info queue",
174
+ )
175
+
176
+
177
+
178
  # Remove the local file
179
  os.remove(out_path)
180