brunneis commited on
Commit
12501d0
·
unverified ·
1 Parent(s): 8a1fb40

Update score calc

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +13 -8
src/leaderboard/read_evals.py CHANGED
@@ -124,22 +124,27 @@ class EvalResult:
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
126
  'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
127
- 'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
128
  }
129
-
130
- soliditybench = 0
131
- non_zero_scores = {k: v for k, v in scores.items() if v != 0}
132
- if non_zero_scores:
 
 
 
133
  weights = {
134
  'naive_judge': 0.1,
135
  'human_eval_solidity_pass_1': 0.5,
136
- 'human_eval_solidity_pass_3': 0.4
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
- soliditybench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
 
 
140
 
141
  data_dict = {
142
- "eval_name": self.eval_name, # not a column, just a save name,
143
  AutoEvalColumn.precision.name: self.precision.value.name,
144
  AutoEvalColumn.model_type.name: self.model_type.value.name,
145
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
 
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
126
  'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
127
+ 'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0),
128
  }
129
+
130
+ # Set soliditybench score to 0 if HumanEval scores are not present
131
+ if (scores['human_eval_solidity_pass_1'] == 0 and
132
+ scores['human_eval_solidity_pass_3'] == 0):
133
+ soliditybench = 0
134
+ else:
135
+ non_zero_scores = {k: v for k, v in scores.items() if v != 0}
136
  weights = {
137
  'naive_judge': 0.1,
138
  'human_eval_solidity_pass_1': 0.5,
139
+ 'human_eval_solidity_pass_3': 0.4,
140
  }
141
  total_weight = sum(weights[k] for k in non_zero_scores)
142
+ soliditybench = sum(
143
+ scores[k] * weights[k] / total_weight for k in non_zero_scores
144
+ )
145
 
146
  data_dict = {
147
+ 'eval_name': self.eval_name, # not a column, just a save name
148
  AutoEvalColumn.precision.name: self.precision.value.name,
149
  AutoEvalColumn.model_type.name: self.model_type.value.name,
150
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,