idolezal commited on
Commit
b35e51f
1 Parent(s): cc2dcfa

Changed submission ID and added more informations from submission

Browse files
Files changed (2) hide show
  1. app.py +34 -25
  2. server.py +62 -31
app.py CHANGED
@@ -19,29 +19,37 @@ from server import LeaderboardServer
19
  leaderboard_server = LeaderboardServer()
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  def on_submit_pressed():
23
  return gr.update(value='Processing submission...', interactive=False)
24
 
25
 
26
- def validate_submission_inputs(team_name, submission_id, link_to_model, submission_file):
27
- if not team_name or not submission_id or not link_to_model or not submission_file:
28
  raise ValueError('Please fill in all fields')
29
- if not os.path.exists(submission_file):
30
  raise ValueError('File does not exist')
31
 
32
-
33
- def process_submission(team_name, submission_id, description, link_to_model, submission_file):
34
  try:
35
- validate_submission_inputs(team_name, submission_id, link_to_model, submission_file)
36
- metadata = {
37
- "team_name": team_name,
38
- "submission_id": submission_id,
39
- "description": description,
40
- "link_to_model": link_to_model,
41
- }
42
  gr.Info('Submission valid, running local tournament...')
43
 
44
- leaderboard_server.prepare_model_for_submission(submission_file, metadata)
45
  except ValueError as err:
46
  gr.Warning(str(err))
47
  return (
@@ -226,15 +234,22 @@ with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css) as main:
226
  with gr.Tab('Submission'):
227
  with gr.Column():
228
  gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
 
 
 
229
  with gr.Row():
230
- submission_team_name_tb = gr.Textbox(label='Team Name')
231
- submission_id_tb = gr.Textbox(label='Submission ID')
 
 
232
 
233
  with gr.Row():
234
- description_tb = gr.Textbox(label='Description', type='text')
235
- link_to_model_tb = gr.Textbox(label='Link to model', type='text')
 
236
 
237
- submission_file_path = gr.File(label='Upload your results', type='filepath')
 
238
  pre_submission_btn = gr.Button(value='Pre-submit model', interactive=True)
239
 
240
  submit_prompt = gr.Markdown(
@@ -263,13 +278,7 @@ with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css) as main:
263
  outputs=[pre_submission_btn],
264
  ).then(
265
  fn=process_submission,
266
- inputs=[
267
- submission_team_name_tb,
268
- submission_id_tb,
269
- description_tb,
270
- link_to_model_tb,
271
- submission_file_path,
272
- ],
273
  outputs=[
274
  pre_submission_btn,
275
  submit_prompt,
 
19
  leaderboard_server = LeaderboardServer()
20
 
21
 
22
+ SUBMISSION_INPUTS = dict.fromkeys((
23
+ "team_name",
24
+ "model_name",
25
+ "model_type",
26
+ "parameters",
27
+ "precision",
28
+ "description",
29
+ "link_to_model",
30
+ "submission_file",
31
+ )).keys()
32
+
33
  def on_submit_pressed():
34
  return gr.update(value='Processing submission...', interactive=False)
35
 
36
 
37
+ def validate_submission_inputs(**kwargs):
38
+ if SUBMISSION_INPUTS - set(kwargs) != set():
39
  raise ValueError('Please fill in all fields')
40
+ if not os.path.exists(kwargs["submission_file"]):
41
  raise ValueError('File does not exist')
42
 
43
+ def process_submission(*inputs):
 
44
  try:
45
+ inputs = dict(zip(SUBMISSION_INPUTS, inputs))
46
+ validate_submission_inputs(**inputs)
47
+ metadata = SUBMISSION_INPUTS - {"submission_file"}
48
+ metadata = {key: inputs[key] for key in metadata}
49
+
 
 
50
  gr.Info('Submission valid, running local tournament...')
51
 
52
+ leaderboard_server.prepare_model_for_submission(inputs["submission_file"], metadata)
53
  except ValueError as err:
54
  gr.Warning(str(err))
55
  return (
 
234
  with gr.Tab('Submission'):
235
  with gr.Column():
236
  gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
237
+
238
+ submission_inputs = dict.fromkeys(SUBMISSION_INPUTS)
239
+
240
  with gr.Row():
241
+ submission_inputs["team_name"] = gr.Textbox(label='Team name')
242
+ submission_inputs["model_name"] = gr.Textbox(label='Model name')
243
+ submission_inputs["model_type"] = gr.Textbox(label='Model type')
244
+ submission_inputs["parameters"] = gr.Textbox(label='Parameters')
245
 
246
  with gr.Row():
247
+ submission_inputs["precision"] = gr.Textbox(label='Precision')
248
+ submission_inputs["description"] = gr.Textbox(label='Description', type='text')
249
+ submission_inputs["link_to_model"] = gr.Textbox(label='Link to model', type='text')
250
 
251
+ submission_inputs["submission_file"] = gr.File(label='Upload your results', type='filepath')
252
+
253
  pre_submission_btn = gr.Button(value='Pre-submit model', interactive=True)
254
 
255
  submit_prompt = gr.Markdown(
 
278
  outputs=[pre_submission_btn],
279
  ).then(
280
  fn=process_submission,
281
+ inputs=list(submission_inputs.values()),
 
 
 
 
 
 
282
  outputs=[
283
  pre_submission_btn,
284
  submit_prompt,
server.py CHANGED
@@ -2,6 +2,7 @@ import copy
2
  import glob
3
  import json
4
  import os
 
5
 
6
  import gradio as gr
7
  import pandas as pd
@@ -18,7 +19,6 @@ REPO = f"{ORG}/LLM_benchmark_data"
18
  HF_TOKEN = os.environ.get("HF_TOKEN")
19
  TASKS_METADATA_PATH = "./tasks_metadata.json"
20
 
21
-
22
  class LeaderboardServer:
23
  def __init__(self):
24
  self.server_address = REPO
@@ -29,7 +29,7 @@ class LeaderboardServer:
29
  token=HF_TOKEN,
30
  local_dir="./",
31
  )
32
- self.submisssion_id_to_file = {} # Map submission ids to file paths
33
  self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
34
  self.tasks_categories = {self.tasks_metadata[task]["category"] for task in self.tasks_metadata}
35
  self.submission_ids = set()
@@ -57,15 +57,15 @@ class LeaderboardServer:
57
 
58
  def fetch_existing_models(self):
59
  # Models data
60
- for submission in glob.glob(os.path.join(self.local_leaderboard, "data") + "/*.json"):
61
- data = json.load(open(submission))
62
  metadata = data.get('metadata')
63
  if metadata is None:
64
  continue
65
- submission_id = metadata["team_name"] + "_" + metadata["submission_id"]
66
  self.submission_ids.add(submission_id)
67
 
68
- self.submisssion_id_to_file[submission_id] = submission
69
 
70
  def get_leaderboard(self, tournament_results=None):
71
  results = tournament_results if tournament_results else self.tournament_results
@@ -74,24 +74,26 @@ class LeaderboardServer:
74
  return pd.DataFrame(columns=['No submissions yet'])
75
  else:
76
  processed_results = []
77
- for submission in results.keys():
78
- path = self.submisssion_id_to_file.get(submission)
79
  if path is None:
80
- if self.pre_submit and submission == self.pre_submit[1]:
81
  data = json.load(open(self.pre_submit[2]))
82
  else:
83
- raise gr.Error(f"Internal error: Submission [{submission}] not found")
84
  elif path:
85
  data = json.load(open(path))
86
  else:
87
- raise gr.Error(f"Submission [{submission}] not found")
88
- submission_id = data["metadata"]["team_name"] + "_" + data["metadata"]["submission_id"]
 
 
89
 
90
  local_results = {}
91
  for task in self.tasks_metadata.keys():
92
  local_results[task] = 0
93
- for model in results[submission].keys():
94
- if results[submission][model][task]:
95
  local_results[task] += 1
96
  for metric in VISIBLE_METRICS:
97
  metric_value = data['results'][task].get(metric)
@@ -100,7 +102,7 @@ class LeaderboardServer:
100
 
101
  local_results["submission_id"] = submission_id
102
 
103
- if self.pre_submit and submission == self.pre_submit[1]:
104
  processed_results.insert(0, local_results)
105
  else:
106
  processed_results.append(local_results)
@@ -120,42 +122,71 @@ class LeaderboardServer:
120
  )
121
  return dataframe
122
 
123
- def start_tournament(self, new_model_id, new_model_file):
124
  new_tournament = copy.deepcopy(self.tournament_results)
125
- new_tournament[new_model_id] = {}
126
- new_tournament[new_model_id][new_model_id] = {
127
  task: False for task in self.tasks_metadata.keys()
128
  }
129
 
130
- for model in self.submission_ids:
131
- res = check_significance(new_model_file, self.submisssion_id_to_file[model])
132
- res_inverse = check_significance(self.submisssion_id_to_file[model], new_model_file)
133
- new_tournament[new_model_id][model] = {
134
  task: data["significant"] for task, data in res.items()
135
  }
136
- new_tournament[model][new_model_id] = {
137
  task: data["significant"] for task, data in res_inverse.items()
138
  }
139
  return new_tournament
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def prepare_model_for_submission(self, file, metadata) -> None:
142
  with open(file, "r") as f:
143
  data = json.load(f)
 
144
  data["metadata"] = metadata
 
 
 
 
 
 
 
 
145
  with open(file, "w") as f:
146
  json.dump(data, f, separators=(',', ':')) # compact JSON
147
-
148
- model_id = metadata["team_name"] + "_" + metadata["submission_id"]
149
- tournament_results = self.start_tournament(model_id, file)
150
- self.pre_submit = tournament_results, model_id, file
151
 
152
  def save_pre_submit(self):
153
  if self.pre_submit:
154
- tournament_results, model_id, file = self.pre_submit
155
- filename = os.path.basename(file)
156
  api.upload_file(
157
  path_or_fileobj=file,
158
- path_in_repo=f"data/{model_id}_{filename}",
159
  repo_id=self.server_address,
160
  repo_type=self.repo_type,
161
  token=HF_TOKEN,
@@ -175,7 +206,7 @@ class LeaderboardServer:
175
  )
176
 
177
  def get_model_detail(self, submission_id):
178
- path = self.submisssion_id_to_file.get(submission_id)
179
  if path is None:
180
  raise gr.Error(f"Submission [{submission_id}] not found")
181
  data = json.load(open(path))
 
2
  import glob
3
  import json
4
  import os
5
+ import hashlib
6
 
7
  import gradio as gr
8
  import pandas as pd
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
  TASKS_METADATA_PATH = "./tasks_metadata.json"
21
 
 
22
  class LeaderboardServer:
23
  def __init__(self):
24
  self.server_address = REPO
 
29
  token=HF_TOKEN,
30
  local_dir="./",
31
  )
32
+ self.submission_id_to_file = {} # Map submission ids to file paths
33
  self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
34
  self.tasks_categories = {self.tasks_metadata[task]["category"] for task in self.tasks_metadata}
35
  self.submission_ids = set()
 
57
 
58
  def fetch_existing_models(self):
59
  # Models data
60
+ for submission_file in glob.glob(os.path.join(self.local_leaderboard, "data") + "/*.json"):
61
+ data = json.load(open(submission_file))
62
  metadata = data.get('metadata')
63
  if metadata is None:
64
  continue
65
+ submission_id = metadata["submission_id"]
66
  self.submission_ids.add(submission_id)
67
 
68
+ self.submission_id_to_file[submission_id] = submission_file
69
 
70
  def get_leaderboard(self, tournament_results=None):
71
  results = tournament_results if tournament_results else self.tournament_results
 
74
  return pd.DataFrame(columns=['No submissions yet'])
75
  else:
76
  processed_results = []
77
+ for submission_id in results.keys():
78
+ path = self.submission_id_to_file.get(submission_id)
79
  if path is None:
80
+ if self.pre_submit and submission_id == self.pre_submit[1]:
81
  data = json.load(open(self.pre_submit[2]))
82
  else:
83
+ raise gr.Error(f"Internal error: Submission [{submission_id}] not found")
84
  elif path:
85
  data = json.load(open(path))
86
  else:
87
+ raise gr.Error(f"Submission [{submission_id}] not found")
88
+
89
+ if submission_id != data["metadata"]["submission_id"]:
90
+ raise gr.Error(f"Proper submission [{submission_id}] not found")
91
 
92
  local_results = {}
93
  for task in self.tasks_metadata.keys():
94
  local_results[task] = 0
95
+ for model in results[submission_id].keys():
96
+ if results[submission_id][model][task]:
97
  local_results[task] += 1
98
  for metric in VISIBLE_METRICS:
99
  metric_value = data['results'][task].get(metric)
 
102
 
103
  local_results["submission_id"] = submission_id
104
 
105
+ if self.pre_submit and submission_id == self.pre_submit[1]:
106
  processed_results.insert(0, local_results)
107
  else:
108
  processed_results.append(local_results)
 
122
  )
123
  return dataframe
124
 
125
+ def start_tournament(self, new_submission_id, new_model_file):
126
  new_tournament = copy.deepcopy(self.tournament_results)
127
+ new_tournament[new_submission_id] = {}
128
+ new_tournament[new_submission_id][new_submission_id] = {
129
  task: False for task in self.tasks_metadata.keys()
130
  }
131
 
132
+ for submission_id in self.submission_ids:
133
+ res = check_significance(new_model_file, self.submission_id_to_file[submission_id])
134
+ res_inverse = check_significance(self.submission_id_to_file[submission_id], new_model_file)
135
+ new_tournament[new_submission_id][submission_id] = {
136
  task: data["significant"] for task, data in res.items()
137
  }
138
+ new_tournament[submission_id][new_submission_id] = {
139
  task: data["significant"] for task, data in res_inverse.items()
140
  }
141
  return new_tournament
142
 
143
+ @staticmethod
144
+ def create_submission_id(metadata):
145
+ # Délka ID můsí být omezena, protože se používá v názvu souboru
146
+ submission_id = "_".join([metadata[key][:7] for key in (
147
+ "team_name",
148
+ "model_name",
149
+ "model_predictions_sha256",
150
+ "model_results_sha256",
151
+ )])
152
+ return submission_id
153
+
154
+ @staticmethod
155
+ def get_sha256_hexdigest(obj):
156
+ data = json.dumps(
157
+ obj,
158
+ separators=(',', ':'),
159
+ sort_keys=True,
160
+ ensure_ascii=True,
161
+ ).encode()
162
+ result = hashlib.sha256(data).hexdigest()
163
+ return result
164
+
165
  def prepare_model_for_submission(self, file, metadata) -> None:
166
  with open(file, "r") as f:
167
  data = json.load(f)
168
+
169
  data["metadata"] = metadata
170
+
171
+ metadata["model_predictions_sha256"] = self.get_sha256_hexdigest(data["predictions"])
172
+ metadata["model_results_sha256"] = self.get_sha256_hexdigest(data["results"])
173
+
174
+ # Délka ID můsí být omezena, protože se používá v názvu souboru
175
+ submission_id = self.create_submission_id(metadata)
176
+ metadata["submission_id"] = submission_id
177
+
178
  with open(file, "w") as f:
179
  json.dump(data, f, separators=(',', ':')) # compact JSON
180
+
181
+ tournament_results = self.start_tournament(submission_id, file)
182
+ self.pre_submit = tournament_results, submission_id, file
 
183
 
184
  def save_pre_submit(self):
185
  if self.pre_submit:
186
+ tournament_results, submission_id, file = self.pre_submit
 
187
  api.upload_file(
188
  path_or_fileobj=file,
189
+ path_in_repo=f"data/{submission_id}.json",
190
  repo_id=self.server_address,
191
  repo_type=self.repo_type,
192
  token=HF_TOKEN,
 
206
  )
207
 
208
  def get_model_detail(self, submission_id):
209
+ path = self.submission_id_to_file.get(submission_id)
210
  if path is None:
211
  raise gr.Error(f"Submission [{submission_id}] not found")
212
  data = json.load(open(path))