brunneis commited on
Commit
b735ffe
·
unverified ·
1 Parent(s): 12501d0

Update score formula

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +3 -6
src/leaderboard/read_evals.py CHANGED
@@ -123,20 +123,17 @@ class EvalResult:
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
126
- 'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
127
  'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0),
128
  }
129
 
130
  # Set soliditybench score to 0 if HumanEval scores are not present
131
- if (scores['human_eval_solidity_pass_1'] == 0 and
132
- scores['human_eval_solidity_pass_3'] == 0):
133
  soliditybench = 0
134
  else:
135
  non_zero_scores = {k: v for k, v in scores.items() if v != 0}
136
  weights = {
137
- 'naive_judge': 0.1,
138
- 'human_eval_solidity_pass_1': 0.5,
139
- 'human_eval_solidity_pass_3': 0.4,
140
  }
141
  total_weight = sum(weights[k] for k in non_zero_scores)
142
  soliditybench = sum(
 
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
 
126
  'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0),
127
  }
128
 
129
  # Set soliditybench score to 0 if HumanEval scores are not present
130
+ if scores['human_eval_solidity_pass_3'] == 0:
 
131
  soliditybench = 0
132
  else:
133
  non_zero_scores = {k: v for k, v in scores.items() if v != 0}
134
  weights = {
135
+ 'naive_judge': 0.15,
136
+ 'human_eval_solidity_pass_3': 0.85,
 
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
  soliditybench = sum(