Spaces:
Restarting
Restarting
reject duplicate submission
Browse files
app.py
CHANGED
@@ -15,7 +15,21 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
15 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
16 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
repo = None
|
|
|
19 |
if H4_TOKEN:
|
20 |
print("pulling repo")
|
21 |
# try:
|
@@ -31,6 +45,9 @@ if H4_TOKEN:
|
|
31 |
)
|
32 |
repo.git_pull()
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
# parse the results
|
36 |
BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
|
@@ -110,7 +127,7 @@ def get_leaderboard():
|
|
110 |
|
111 |
dataframe = pd.DataFrame.from_records(all_data)
|
112 |
dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
|
113 |
-
print(dataframe)
|
114 |
dataframe = dataframe[COLS]
|
115 |
return dataframe
|
116 |
|
@@ -187,12 +204,12 @@ def add_new_eval(
|
|
187 |
if is_delta_weight and not is_model_on_hub(base_model, revision):
|
188 |
error_message = f'Base model "{base_model}" was not found on hub!'
|
189 |
print(error_message)
|
190 |
-
return f"<p style='color: red; font-size:
|
191 |
|
192 |
if not is_model_on_hub(model, revision):
|
193 |
error_message = f'Model "{model}"was not found on hub!'
|
194 |
print(error_message)
|
195 |
-
return f"<p style='color: red; font-size:
|
196 |
|
197 |
print("adding new eval")
|
198 |
|
@@ -216,6 +233,11 @@ def add_new_eval(
|
|
216 |
os.makedirs(OUT_DIR, exist_ok=True)
|
217 |
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
|
218 |
|
|
|
|
|
|
|
|
|
|
|
219 |
with open(out_path, "w") as f:
|
220 |
f.write(json.dumps(eval_entry))
|
221 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
@@ -230,7 +252,7 @@ def add_new_eval(
|
|
230 |
)
|
231 |
|
232 |
success_message = "Your request has been submitted to the evaluation queue!"
|
233 |
-
return f"<p style='color: green; font-size:
|
234 |
|
235 |
|
236 |
def refresh():
|
|
|
15 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
16 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
17 |
|
18 |
+
|
19 |
+
def get_all_requested_models(requested_models_dir):
|
20 |
+
depth = 1
|
21 |
+
file_names = []
|
22 |
+
|
23 |
+
for root, dirs, files in os.walk(requested_models_dir):
|
24 |
+
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
25 |
+
if current_depth == depth:
|
26 |
+
file_names.extend([os.path.join(root, file) for file in files])
|
27 |
+
|
28 |
+
return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
|
29 |
+
|
30 |
+
|
31 |
repo = None
|
32 |
+
requested_models = None
|
33 |
if H4_TOKEN:
|
34 |
print("pulling repo")
|
35 |
# try:
|
|
|
45 |
)
|
46 |
repo.git_pull()
|
47 |
|
48 |
+
requested_models_dir = "./evals/eval_requests"
|
49 |
+
requested_models = get_all_requested_models(requested_models_dir)
|
50 |
+
|
51 |
|
52 |
# parse the results
|
53 |
BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
|
|
|
127 |
|
128 |
dataframe = pd.DataFrame.from_records(all_data)
|
129 |
dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
|
130 |
+
# print(dataframe)
|
131 |
dataframe = dataframe[COLS]
|
132 |
return dataframe
|
133 |
|
|
|
204 |
if is_delta_weight and not is_model_on_hub(base_model, revision):
|
205 |
error_message = f'Base model "{base_model}" was not found on hub!'
|
206 |
print(error_message)
|
207 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
|
208 |
|
209 |
if not is_model_on_hub(model, revision):
|
210 |
error_message = f'Model "{model}"was not found on hub!'
|
211 |
print(error_message)
|
212 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
|
213 |
|
214 |
print("adding new eval")
|
215 |
|
|
|
233 |
os.makedirs(OUT_DIR, exist_ok=True)
|
234 |
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
|
235 |
|
236 |
+
# Check for duplicate submission
|
237 |
+
if out_path.lower() in requested_models:
|
238 |
+
duplicate_request_message = "This model has been already submitted."
|
239 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{duplicate_request_message}</p>"
|
240 |
+
|
241 |
with open(out_path, "w") as f:
|
242 |
f.write(json.dumps(eval_entry))
|
243 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
|
|
252 |
)
|
253 |
|
254 |
success_message = "Your request has been submitted to the evaluation queue!"
|
255 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{success_message}</p>"
|
256 |
|
257 |
|
258 |
def refresh():
|
utils.py
CHANGED
@@ -133,4 +133,4 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]:
|
|
133 |
return [e.to_dict() for e in eval_results]
|
134 |
|
135 |
eval_results_dict = get_eval_results_dicts()
|
136 |
-
print(eval_results_dict)
|
|
|
133 |
return [e.to_dict() for e in eval_results]
|
134 |
|
135 |
eval_results_dict = get_eval_results_dicts()
|
136 |
+
# print(eval_results_dict)
|