davidadamczyk commited on
Commit
bc7fa0c
·
1 Parent(s): dc5cd2c

Add new features

Browse files
Files changed (4) hide show
  1. app.py +3 -1
  2. src/display/about.py +2 -0
  3. src/display/utils.py +3 -3
  4. src/populate.py +4 -2
app.py CHANGED
@@ -11,6 +11,7 @@ from src.display.about import (
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
 
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.display.utils import (
@@ -209,6 +210,7 @@ with demo:
209
  elem_id="filter-columns-size",
210
  )
211
  """
 
212
  leaderboard_table = gr.components.Dataframe(
213
  value=leaderboard_df[
214
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
@@ -305,7 +307,7 @@ with demo:
305
  with gr.Column():
306
  model_name_textbox = gr.Textbox(label="Model name")
307
  precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
308
- hf_model_id = gr.Textbox(label="Huggingface Model ID")
309
  contact_email = gr.Textbox(label="E-Mail")
310
  file_output = gr.File()
311
  upload_button = gr.UploadButton("Upload json", file_types=['.json'])
 
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
+ TABLE_DESC,
15
  )
16
  from src.display.css_html_js import custom_css
17
  from src.display.utils import (
 
210
  elem_id="filter-columns-size",
211
  )
212
  """
213
+ gr.Markdown(TABLE_DESC, elem_classes="markdown-text")
214
  leaderboard_table = gr.components.Dataframe(
215
  value=leaderboard_df[
216
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
 
307
  with gr.Column():
308
  model_name_textbox = gr.Textbox(label="Model name")
309
  precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
310
+ hf_model_id = gr.Textbox(label="Model URL")
311
  contact_email = gr.Textbox(label="E-Mail")
312
  file_output = gr.File()
313
  upload_button = gr.UploadButton("Upload json", file_types=['.json'])
src/display/about.py CHANGED
@@ -38,6 +38,8 @@ Czech-Bench is a collection of LLM benchmarks available for the Czech language.
38
  Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
39
  """
40
 
 
 
41
  # Which evaluations are you running? how can people reproduce what you have?
42
  LLM_BENCHMARKS_TEXT = f"""
43
  ## Basic Information
 
38
  Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
39
  """
40
 
41
+ TABLE_DESC = "The values presented in the table represent the accuracy metric."
42
+
43
  # Which evaluations are you running? how can people reproduce what you have?
44
  LLM_BENCHMARKS_TEXT = f"""
45
  ## Basic Information
src/display/utils.py CHANGED
@@ -47,9 +47,9 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
47
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
48
  """
49
 
50
- auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
51
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
52
- auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
53
  auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
54
  auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
55
  auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
 
47
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
48
  """
49
 
50
+ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
51
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
52
+ auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", False)])
53
  auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
54
  auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
55
  auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
src/populate.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import numpy as np
4
  import pandas as pd
5
 
6
- from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
@@ -13,11 +13,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
13
  #all_data_json = [v.to_dict() for v in raw_data]
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
16
  df = df[cols].round(decimals=2)
17
  df.replace(r'\s+', np.nan, regex=True)
18
  # filter out if any of the benchmarks have not been produced
19
  df = df[has_no_nan_values(df, benchmark_cols)]
20
-
 
21
  return raw_data, df
22
 
23
 
 
3
  import numpy as np
4
  import pandas as pd
5
 
6
+ from src.display.formatting import has_no_nan_values, make_clickable_model, model_hyperlink
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
 
13
  #all_data_json = [v.to_dict() for v in raw_data]
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
16
+ df = df.rename(columns={'eval_name': 'Model', 'precision': 'Precision'})
17
  df = df[cols].round(decimals=2)
18
  df.replace(r'\s+', np.nan, regex=True)
19
  # filter out if any of the benchmarks have not been produced
20
  df = df[has_no_nan_values(df, benchmark_cols)]
21
+ df['Model'] = df.apply(lambda row: model_hyperlink(row['hf_model_id'], row['Model']), axis=1)
22
+
23
  return raw_data, df
24
 
25