Commit
·
bc7fa0c
1
Parent(s):
dc5cd2c
Add new features
Browse files- app.py +3 -1
- src/display/about.py +2 -0
- src/display/utils.py +3 -3
- src/populate.py +4 -2
app.py
CHANGED
@@ -11,6 +11,7 @@ from src.display.about import (
|
|
11 |
INTRODUCTION_TEXT,
|
12 |
LLM_BENCHMARKS_TEXT,
|
13 |
TITLE,
|
|
|
14 |
)
|
15 |
from src.display.css_html_js import custom_css
|
16 |
from src.display.utils import (
|
@@ -209,6 +210,7 @@ with demo:
|
|
209 |
elem_id="filter-columns-size",
|
210 |
)
|
211 |
"""
|
|
|
212 |
leaderboard_table = gr.components.Dataframe(
|
213 |
value=leaderboard_df[
|
214 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
@@ -305,7 +307,7 @@ with demo:
|
|
305 |
with gr.Column():
|
306 |
model_name_textbox = gr.Textbox(label="Model name")
|
307 |
precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
|
308 |
-
hf_model_id = gr.Textbox(label="
|
309 |
contact_email = gr.Textbox(label="E-Mail")
|
310 |
file_output = gr.File()
|
311 |
upload_button = gr.UploadButton("Upload json", file_types=['.json'])
|
|
|
11 |
INTRODUCTION_TEXT,
|
12 |
LLM_BENCHMARKS_TEXT,
|
13 |
TITLE,
|
14 |
+
TABLE_DESC,
|
15 |
)
|
16 |
from src.display.css_html_js import custom_css
|
17 |
from src.display.utils import (
|
|
|
210 |
elem_id="filter-columns-size",
|
211 |
)
|
212 |
"""
|
213 |
+
gr.Markdown(TABLE_DESC, elem_classes="markdown-text")
|
214 |
leaderboard_table = gr.components.Dataframe(
|
215 |
value=leaderboard_df[
|
216 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
|
|
307 |
with gr.Column():
|
308 |
model_name_textbox = gr.Textbox(label="Model name")
|
309 |
precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
|
310 |
+
hf_model_id = gr.Textbox(label="Model URL")
|
311 |
contact_email = gr.Textbox(label="E-Mail")
|
312 |
file_output = gr.File()
|
313 |
upload_button = gr.UploadButton("Upload json", file_types=['.json'])
|
src/display/about.py
CHANGED
@@ -38,6 +38,8 @@ Czech-Bench is a collection of LLM benchmarks available for the Czech language.
|
|
38 |
Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
|
39 |
"""
|
40 |
|
|
|
|
|
41 |
# Which evaluations are you running? how can people reproduce what you have?
|
42 |
LLM_BENCHMARKS_TEXT = f"""
|
43 |
## Basic Information
|
|
|
38 |
Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
|
39 |
"""
|
40 |
|
41 |
+
TABLE_DESC = "The values presented in the table represent the accuracy metric."
|
42 |
+
|
43 |
# Which evaluations are you running? how can people reproduce what you have?
|
44 |
LLM_BENCHMARKS_TEXT = f"""
|
45 |
## Basic Information
|
src/display/utils.py
CHANGED
@@ -47,9 +47,9 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
|
|
47 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
48 |
"""
|
49 |
|
50 |
-
auto_eval_column_dict.append(["
|
51 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("
|
52 |
-
auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str",
|
53 |
auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
|
54 |
auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
|
55 |
auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
|
|
|
47 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
48 |
"""
|
49 |
|
50 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
51 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
|
52 |
+
auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", False)])
|
53 |
auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
|
54 |
auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
|
55 |
auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
|
src/populate.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
|
6 |
-
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
@@ -13,11 +13,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
13 |
#all_data_json = [v.to_dict() for v in raw_data]
|
14 |
df = pd.DataFrame.from_records(raw_data)
|
15 |
#df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
|
|
16 |
df = df[cols].round(decimals=2)
|
17 |
df.replace(r'\s+', np.nan, regex=True)
|
18 |
# filter out if any of the benchmarks have not been produced
|
19 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
20 |
-
|
|
|
21 |
return raw_data, df
|
22 |
|
23 |
|
|
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
|
6 |
+
from src.display.formatting import has_no_nan_values, make_clickable_model, model_hyperlink
|
7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
|
|
13 |
#all_data_json = [v.to_dict() for v in raw_data]
|
14 |
df = pd.DataFrame.from_records(raw_data)
|
15 |
#df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
16 |
+
df = df.rename(columns={'eval_name': 'Model', 'precision': 'Precision'})
|
17 |
df = df[cols].round(decimals=2)
|
18 |
df.replace(r'\s+', np.nan, regex=True)
|
19 |
# filter out if any of the benchmarks have not been produced
|
20 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
21 |
+
df['Model'] = df.apply(lambda row: model_hyperlink(row['hf_model_id'], row['Model']), axis=1)
|
22 |
+
|
23 |
return raw_data, df
|
24 |
|
25 |
|