Removed the revision field since later entries don't have it
Browse files- functions.py +4 -4
functions.py
CHANGED
@@ -45,7 +45,7 @@ def get_task_summary(results):
|
|
45 |
"metric_value": round(results["IFEval"], 2),
|
46 |
"dataset_config": None,
|
47 |
"dataset_split": "train",
|
48 |
-
"dataset_revision": None,
|
49 |
"dataset_args": {"num_few_shot": 0},
|
50 |
"metric_name": "averaged accuracy",
|
51 |
},
|
@@ -56,7 +56,7 @@ def get_task_summary(results):
|
|
56 |
"metric_value": round(results["BBH"], 2),
|
57 |
"dataset_config": None,
|
58 |
"dataset_split": "test",
|
59 |
-
"dataset_revision": None,
|
60 |
"dataset_args": {"num_few_shot": 3},
|
61 |
"metric_name": "normalized accuracy",
|
62 |
},
|
@@ -67,7 +67,7 @@ def get_task_summary(results):
|
|
67 |
"metric_value": round(results["MATH Lvl 5"], 2),
|
68 |
"dataset_config": None,
|
69 |
"dataset_split": "test",
|
70 |
-
"dataset_revision": None,
|
71 |
"dataset_args": {"num_few_shot": 4},
|
72 |
"metric_name": "exact match",
|
73 |
},
|
@@ -78,7 +78,7 @@ def get_task_summary(results):
|
|
78 |
"metric_value": round(results["GPQA"], 2),
|
79 |
"dataset_config": None,
|
80 |
"dataset_split": "train",
|
81 |
-
"dataset_revision": None,
|
82 |
"dataset_args": {"num_few_shot": 0},
|
83 |
"metric_name": "acc_norm",
|
84 |
},
|
|
|
45 |
"metric_value": round(results["IFEval"], 2),
|
46 |
"dataset_config": None,
|
47 |
"dataset_split": "train",
|
48 |
+
#"dataset_revision": None,
|
49 |
"dataset_args": {"num_few_shot": 0},
|
50 |
"metric_name": "averaged accuracy",
|
51 |
},
|
|
|
56 |
"metric_value": round(results["BBH"], 2),
|
57 |
"dataset_config": None,
|
58 |
"dataset_split": "test",
|
59 |
+
#"dataset_revision": None,
|
60 |
"dataset_args": {"num_few_shot": 3},
|
61 |
"metric_name": "normalized accuracy",
|
62 |
},
|
|
|
67 |
"metric_value": round(results["MATH Lvl 5"], 2),
|
68 |
"dataset_config": None,
|
69 |
"dataset_split": "test",
|
70 |
+
#"dataset_revision": None,
|
71 |
"dataset_args": {"num_few_shot": 4},
|
72 |
"metric_name": "exact match",
|
73 |
},
|
|
|
78 |
"metric_value": round(results["GPQA"], 2),
|
79 |
"dataset_config": None,
|
80 |
"dataset_split": "train",
|
81 |
+
#"dataset_revision": None,
|
82 |
"dataset_args": {"num_few_shot": 0},
|
83 |
"metric_name": "acc_norm",
|
84 |
},
|