tdoehmen commited on
Commit
6fdb323
1 Parent(s): 0e01bbd

output formatting

Browse files
Files changed (1) hide show
  1. evaluation_logic.py +4 -10
evaluation_logic.py CHANGED
@@ -166,23 +166,17 @@ def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgranitesh
166
  if metrics:
167
  yield "Overall Results:"
168
  overall_metrics = metrics['exec']['all']
169
- yield f"Count: {overall_metrics['count']}"
170
- yield f"Execution Accuracy: {overall_metrics['exec']:.3f}"
171
- yield f"Exact Match Accuracy: {overall_metrics['exact']:.3f}"
172
- yield f"Equality: {metrics['equality']['equality']:.3f}"
173
- yield f"Edit Distance: {metrics['edit_distance']['edit_distance']:.3f}"
174
 
175
- yield "\nResults by Category:"
176
  categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
177
 
178
  for category in categories:
179
  if category in metrics['exec']:
180
- yield f"\n{category}:"
181
  category_metrics = metrics['exec'][category]
182
- yield f"Count: {category_metrics['count']}"
183
- yield f"Execution Accuracy: {category_metrics['exec']:.3f}"
184
  else:
185
- yield f"\n{category}: No data available"
186
  else:
187
  yield "No evaluation metrics returned."
188
  except Exception as e:
 
166
  if metrics:
167
  yield "Overall Results:"
168
  overall_metrics = metrics['exec']['all']
169
+ yield f"All (n={overall_metrics['count']}) - Execution Accuracy: {overall_metrics['exec']:.3f}"
170
+ yield f"All (n={overall_metrics['count']}) - Edit Distance: {metrics['edit_distance']['edit_distance']:.3f}"
 
 
 
171
 
 
172
  categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
173
 
174
  for category in categories:
175
  if category in metrics['exec']:
 
176
  category_metrics = metrics['exec'][category]
177
+ yield f"{category} (n={category_metrics['count']}) - Execution Accuracy: {category_metrics['exec']:.3f}"
 
178
  else:
179
+ yield f"{category}: No data available"
180
  else:
181
  yield "No evaluation metrics returned."
182
  except Exception as e: