Spaces:
Sleeping
Sleeping
Use latest result per model
Browse files
app.py
CHANGED
@@ -11,32 +11,48 @@ RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results"
|
|
11 |
fs = HfFileSystem()
|
12 |
|
13 |
|
14 |
-
def
|
15 |
-
|
16 |
-
results = [file[len(RESULTS_DATASET_ID) +1:] for file in files]
|
17 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def load_result(result_path) -> pd.DataFrame:
|
21 |
-
with fs.open(
|
22 |
data = json.load(f)
|
23 |
model_name = data.get("model_name", "Model")
|
24 |
df = pd.json_normalize([data])
|
25 |
return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame() # .reset_index()
|
26 |
|
27 |
|
28 |
-
def render_result_1(
|
|
|
29 |
result = load_result(result_path)
|
30 |
return pd.concat([result, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
|
31 |
|
32 |
|
33 |
-
def render_result_2(
|
|
|
34 |
result = load_result(result_path)
|
35 |
return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result], axis=1).reset_index()
|
36 |
|
37 |
|
38 |
if __name__ == "__main__":
|
39 |
-
|
40 |
|
41 |
with gr.Blocks(fill_height=True) as demo:
|
42 |
gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
|
@@ -44,10 +60,10 @@ if __name__ == "__main__":
|
|
44 |
|
45 |
with gr.Row():
|
46 |
with gr.Column():
|
47 |
-
|
48 |
load_btn_1 = gr.Button("Load")
|
49 |
with gr.Column():
|
50 |
-
|
51 |
load_btn_2 = gr.Button("Load")
|
52 |
|
53 |
with gr.Row():
|
@@ -61,12 +77,12 @@ if __name__ == "__main__":
|
|
61 |
|
62 |
load_btn_1.click(
|
63 |
fn=render_result_1,
|
64 |
-
inputs=[
|
65 |
outputs=compared_results,
|
66 |
)
|
67 |
load_btn_2.click(
|
68 |
fn=render_result_2,
|
69 |
-
inputs=[
|
70 |
outputs=compared_results,
|
71 |
)
|
72 |
|
|
|
11 |
fs = HfFileSystem()
|
12 |
|
13 |
|
14 |
+
def fetch_result_paths():
|
15 |
+
paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
|
16 |
+
# results = [file[len(RESULTS_DATASET_ID) +1:] for file in files]
|
17 |
+
return paths
|
18 |
+
|
19 |
+
|
20 |
+
def filter_latest_result_path_per_model(paths):
|
21 |
+
from collections import defaultdict
|
22 |
+
|
23 |
+
d = defaultdict(list)
|
24 |
+
for path in paths:
|
25 |
+
model_id, _ = path[len(RESULTS_DATASET_ID) +1:].rsplit("/", 1)
|
26 |
+
d[model_id].append(path)
|
27 |
+
return {model_id: max(paths) for model_id, paths in d.items()}
|
28 |
+
|
29 |
+
|
30 |
+
def get_result_path_from_model(model_id, result_path_per_model):
|
31 |
+
return result_path_per_model[model_id]
|
32 |
|
33 |
|
34 |
def load_result(result_path) -> pd.DataFrame:
|
35 |
+
with fs.open(result_path, "r") as f:
|
36 |
data = json.load(f)
|
37 |
model_name = data.get("model_name", "Model")
|
38 |
df = pd.json_normalize([data])
|
39 |
return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame() # .reset_index()
|
40 |
|
41 |
|
42 |
+
def render_result_1(model_id, results):
|
43 |
+
result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
|
44 |
result = load_result(result_path)
|
45 |
return pd.concat([result, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
|
46 |
|
47 |
|
48 |
+
def render_result_2(model_id, results):
|
49 |
+
result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
|
50 |
result = load_result(result_path)
|
51 |
return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result], axis=1).reset_index()
|
52 |
|
53 |
|
54 |
if __name__ == "__main__":
|
55 |
+
latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
|
56 |
|
57 |
with gr.Blocks(fill_height=True) as demo:
|
58 |
gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
|
|
|
60 |
|
61 |
with gr.Row():
|
62 |
with gr.Column():
|
63 |
+
model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
|
64 |
load_btn_1 = gr.Button("Load")
|
65 |
with gr.Column():
|
66 |
+
model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
|
67 |
load_btn_2 = gr.Button("Load")
|
68 |
|
69 |
with gr.Row():
|
|
|
77 |
|
78 |
load_btn_1.click(
|
79 |
fn=render_result_1,
|
80 |
+
inputs=[model_id_1, compared_results],
|
81 |
outputs=compared_results,
|
82 |
)
|
83 |
load_btn_2.click(
|
84 |
fn=render_result_2,
|
85 |
+
inputs=[model_id_2, compared_results],
|
86 |
outputs=compared_results,
|
87 |
)
|
88 |
|