abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on 23 days ago

Commit

de9585b

1 Parent(s): 58db0a0

Hacky but refreshing works. Now to simplify

Browse files

Files changed (2) hide show

app.py +29 -13
utils.py +4 -0

app.py CHANGED Viewed

@@ -15,12 +15,7 @@ from constants import (
 from about import ABOUT_TEXT, FAQS
 from submit import make_submission
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
-    # Previous things that were nice in the constellaration leaderboard:
-    # Having a submission time column, and a user column where the username is clickable (this is a pro for usability but con for anonymity)
-    # full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
-    # to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
     if assay is not None:
         df = df[df["assay"] == assay]
@@ -34,19 +29,34 @@ def get_leaderboard_object(assay: str | None = None):
         filter_columns.append("property")
     # TODO how to sort filter columns alphabetically?
     # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
-    Leaderboard(
-        # TODO(Lood) check that this actually refreshes using the function
-        value=format_leaderboard_table(df_results=fetch_hf_results(), assay=assay),
         datatype=["str", "str", "str", "number"],
         select_columns=["model", "property", "spearman", "dataset"],
-        search_columns=["model"],  # Note(Lood): Would be nice to make this clear it's searching on model name
         filter_columns=filter_columns,
-        every=60,
         render=True,
     )
 with gr.Blocks() as demo:
     # TODO: Add Ginkgo logo here on the top right
     gr.Markdown("""
         ## Welcome to the Ginkgo Antibody Developability Benchmark!
@@ -73,19 +83,25 @@ with gr.Blocks() as demo:
                     gr.Markdown(f"*{answer}*")  # Italics for answers
         # Procedurally make these 5 tabs
-        for assay in ASSAY_LIST:
             with gr.TabItem(
                 f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
                 elem_id="abdev-benchmark-tab-table",
             ):
                 gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
-                get_leaderboard_object(assay=assay)
         with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table"):
             gr.Markdown(
                 "# Antibody Developability Benchmark Leaderboard over all properties"
             )
-            get_leaderboard_object()
         with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(

 from about import ABOUT_TEXT, FAQS
 from submit import make_submission
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
     if assay is not None:
         df = df[df["assay"] == assay]
         filter_columns.append("property")
     # TODO how to sort filter columns alphabetically?
     # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
+    # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
+    # TODO(Lood) check that this actually refreshes using the function
+    lb = Leaderboard(
+        value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number"],
         select_columns=["model", "property", "spearman", "dataset"],
+        search_columns=["model"],
         filter_columns=filter_columns,
+        every=15,
         render=True,
     )
+    return lb
+# Initialize global dataframe
+current_dataframe = fetch_hf_results()
 with gr.Blocks() as demo:
+    timer = gr.Timer(10)
+    def update_current_dataframe():
+        global current_dataframe
+        current_dataframe = fetch_hf_results()
+        # Don't return anything, just update the global dataframe
+    timer.tick(fn=update_current_dataframe)  # Keep this up to date, all leaderboard objects will use this
+    timers = [gr.Timer(10) for _ in range(6)]  # One timer for each tab
     # TODO: Add Ginkgo logo here on the top right
     gr.Markdown("""
         ## Welcome to the Ginkgo Antibody Developability Benchmark!
                     gr.Markdown(f"*{answer}*")  # Italics for answers
         # Procedurally make these 5 tabs
+        leaderboards = []
+        for i, assay in enumerate(ASSAY_LIST):
             with gr.TabItem(
                 f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
                 elem_id="abdev-benchmark-tab-table",
             ):
                 gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
+                lb = get_leaderboard_object(assay=assay)
+                leaderboards.append(lb)
+                timers[i].tick(fn=lambda _, assay=assay: format_leaderboard_table(df_results=current_dataframe, assay=assay), inputs=leaderboards[i], outputs=leaderboards[i])
         with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table"):
             gr.Markdown(
                 "# Antibody Developability Benchmark Leaderboard over all properties"
             )
+            lb = get_leaderboard_object()
+            leaderboards.append(lb)
+            # Replace the value every 10 seconds
+            timers[5].tick(fn=lambda _: format_leaderboard_table(df_results=current_dataframe), inputs=leaderboards[5], outputs=leaderboards[5])
         with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(

utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import pandas as pd
 from datasets import load_dataset
 import gradio as gr
@@ -12,6 +13,9 @@ def show_output_box(message):
 def fetch_hf_results():
     # Should cache by default if not using force_redownload
     df = load_dataset(
         RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",

+from datetime import datetime, timezone, timedelta
 import pandas as pd
 from datasets import load_dataset
 import gradio as gr
 def fetch_hf_results():
+    # Print current time in EST
+    EST = timezone(timedelta(hours=-4))
+    print(f"tmp: Fetching results from HF at {datetime.now(EST)}")
     # Should cache by default if not using force_redownload
     df = load_dataset(
         RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",