Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: switch the default metric to ndcg_at_10
Browse files- app.py +2 -2
- tests/src/leaderboard/test_read_evals.py +1 -1
app.py
CHANGED
@@ -31,9 +31,9 @@ except Exception:
|
|
31 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
32 |
|
33 |
original_df_qa = get_leaderboard_df(
|
34 |
-
raw_data, task='qa', metric='
|
35 |
original_df_long_doc = get_leaderboard_df(
|
36 |
-
raw_data, task='long-doc', metric='
|
37 |
print(f'raw data: {len(raw_data)}')
|
38 |
print(f'QA data loaded: {original_df_qa.shape}')
|
39 |
print(f'Long-Doc data loaded: {len(original_df_long_doc)}')
|
|
|
31 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
32 |
|
33 |
original_df_qa = get_leaderboard_df(
|
34 |
+
raw_data, task='qa', metric='ndcg_at_10')
|
35 |
original_df_long_doc = get_leaderboard_df(
|
36 |
+
raw_data, task='long-doc', metric='ndcg_at_10')
|
37 |
print(f'raw data: {len(raw_data)}')
|
38 |
print(f'QA data loaded: {original_df_qa.shape}')
|
39 |
print(f'Long-Doc data loaded: {len(original_df_long_doc)}')
|
tests/src/leaderboard/test_read_evals.py
CHANGED
@@ -41,7 +41,7 @@ def test_get_raw_eval_results():
|
|
41 |
def test_get_leaderboard_df():
|
42 |
results_path = cur_fp.parents[2] / "toydata" / "eval_results" / "AIR-Bench_24.04"
|
43 |
raw_data = get_raw_eval_results(results_path)
|
44 |
-
df = get_leaderboard_df(raw_data, 'qa', '
|
45 |
assert df.shape[0] == 4
|
46 |
# the results contain only one embedding model
|
47 |
# for i in range(4):
|
|
|
41 |
def test_get_leaderboard_df():
|
42 |
results_path = cur_fp.parents[2] / "toydata" / "eval_results" / "AIR-Bench_24.04"
|
43 |
raw_data = get_raw_eval_results(results_path)
|
44 |
+
df = get_leaderboard_df(raw_data, 'qa', 'ndcg_at_10')
|
45 |
assert df.shape[0] == 4
|
46 |
# the results contain only one embedding model
|
47 |
# for i in range(4):
|