Spaces:

CIIRC-NLP
/

czechbench_leaderboard

Running

davidadamczyk commited on Sep 10, 2024

Commit

bc7fa0c

1 Parent(s): dc5cd2c

Add new features

Files changed (4) hide show

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from src.display.about import (
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
 from src.display.css_html_js import custom_css
 from src.display.utils import (
@@ -209,6 +210,7 @@ with demo:
                         elem_id="filter-columns-size",
                     )
                 """
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df[
                     [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
@@ -305,7 +307,7 @@ with demo:
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
                     precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
-                    hf_model_id = gr.Textbox(label="Huggingface Model ID")
                     contact_email = gr.Textbox(label="E-Mail")
                     file_output = gr.File()
                     upload_button = gr.UploadButton("Upload json", file_types=['.json'])

     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
+    TABLE_DESC,
 )
 from src.display.css_html_js import custom_css
 from src.display.utils import (
                         elem_id="filter-columns-size",
                     )
                 """
+            gr.Markdown(TABLE_DESC, elem_classes="markdown-text")
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df[
                     [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
                     precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
+                    hf_model_id = gr.Textbox(label="Model URL")
                     contact_email = gr.Textbox(label="E-Mail")
                     file_output = gr.File()
                     upload_button = gr.UploadButton("Upload json", file_types=['.json'])

src/display/about.py CHANGED Viewed

@@ -38,6 +38,8 @@ Czech-Bench is a collection of LLM benchmarks available for the Czech language.
 Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
 """
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## Basic Information

 Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
 """
+TABLE_DESC = "The values presented in the table represent the accuracy metric."
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## Basic Information

src/display/utils.py CHANGED Viewed

@@ -47,9 +47,9 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
 auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
 """
-auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
-auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
-auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
 auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
 auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
 auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])

 auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
 """
+auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
+auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
+auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", False)])
 auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
 auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
 auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])

src/populate.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import numpy as np
 import pandas as pd
-from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 from src.leaderboard.read_evals import get_raw_eval_results
@@ -13,11 +13,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     #all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(raw_data)
     #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     df.replace(r'\s+', np.nan, regex=True)
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df

 import numpy as np
 import pandas as pd
+from src.display.formatting import has_no_nan_values, make_clickable_model, model_hyperlink
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 from src.leaderboard.read_evals import get_raw_eval_results
     #all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(raw_data)
     #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
+    df = df.rename(columns={'eval_name': 'Model', 'precision': 'Precision'})
     df = df[cols].round(decimals=2)
     df.replace(r'\s+', np.nan, regex=True)
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
+    df['Model'] = df.apply(lambda row: model_hyperlink(row['hf_model_id'], row['Model']), axis=1)
     return raw_data, df