Update score formula
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -123,20 +123,17 @@ class EvalResult:
|
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
scores = {
|
125 |
'naive_judge': self.results.get('naive_judge', 0),
|
126 |
-
'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
|
127 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0),
|
128 |
}
|
129 |
|
130 |
# Set soliditybench score to 0 if HumanEval scores are not present
|
131 |
-
if
|
132 |
-
scores['human_eval_solidity_pass_3'] == 0):
|
133 |
soliditybench = 0
|
134 |
else:
|
135 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
136 |
weights = {
|
137 |
-
'naive_judge': 0.
|
138 |
-
'
|
139 |
-
'human_eval_solidity_pass_3': 0.4,
|
140 |
}
|
141 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
142 |
soliditybench = sum(
|
|
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
scores = {
|
125 |
'naive_judge': self.results.get('naive_judge', 0),
|
|
|
126 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0),
|
127 |
}
|
128 |
|
129 |
# Set soliditybench score to 0 if HumanEval scores are not present
|
130 |
+
if scores['human_eval_solidity_pass_3'] == 0:
|
|
|
131 |
soliditybench = 0
|
132 |
else:
|
133 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
134 |
weights = {
|
135 |
+
'naive_judge': 0.15,
|
136 |
+
'human_eval_solidity_pass_3': 0.85,
|
|
|
137 |
}
|
138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
soliditybench = sum(
|