djstrong commited on
Commit
d6e3be2
β€’
1 Parent(s): a8630b1

0-shot description

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. src/about.py +1 -1
  3. src/leaderboard/read_evals.py +2 -1
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Demo Leaderboard
3
- emoji: πŸ₯‡
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.4.0
8
  app_file: app.py
 
1
  ---
2
+ title: Open PL LLM Leaderboard
3
+ emoji: πŸ†πŸ‡΅πŸ‡±
4
+ colorFrom: gray
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.4.0
8
  app_file: app.py
src/about.py CHANGED
@@ -37,7 +37,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
37
 
38
 
39
  # Your leaderboard name
40
- TITLE = """<h1 align="center" id="space-title">Open PL LLM Leaderboard</h1>"""
41
 
42
  # What does your leaderboard evaluate?
43
  INTRODUCTION_TEXT = """
 
37
 
38
 
39
  # Your leaderboard name
40
+ TITLE = """<h1 align="center" id="space-title">Open PL LLM Leaderboard (0-shot)</h1>"""
41
 
42
  # What does your leaderboard evaluate?
43
  INTRODUCTION_TEXT = """
src/leaderboard/read_evals.py CHANGED
@@ -12,6 +12,7 @@ from src.display.formatting import make_clickable_model
12
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
13
  from src.submission.check_validity import is_model_on_hub
14
 
 
15
 
16
  @dataclass
17
  class EvalResult:
@@ -73,7 +74,7 @@ class EvalResult:
73
  task = task.value
74
 
75
  # We average all scores of a given metric (not all metrics are present in all files)
76
- accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == 0])
77
  if accs.size == 0 or any([acc is None for acc in accs]):
78
  continue
79
 
 
12
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
13
  from src.submission.check_validity import is_model_on_hub
14
 
15
+ NUM_FEWSHOT = 0
16
 
17
  @dataclass
18
  class EvalResult:
 
74
  task = task.value
75
 
76
  # We average all scores of a given metric (not all metrics are present in all files)
77
+ accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == NUM_FEWSHOT])
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80