File size: 7,162 Bytes
f10a835
 
193db9d
 
973519b
 
55d797c
193db9d
55d797c
 
0bab47c
193db9d
 
f10a835
55d797c
3b39b49
9756440
55d797c
193db9d
 
633b045
973519b
 
 
55d797c
973519b
 
 
 
55d797c
973519b
193db9d
0bab47c
193db9d
973519b
 
 
 
 
55d797c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193db9d
55d797c
 
 
 
 
193db9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55d797c
 
 
 
 
 
f10a835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22e8b31
 
55d797c
22e8b31
 
55d797c
 
193db9d
 
22e8b31
55d797c
 
193db9d
 
22e8b31
f10a835
 
 
 
 
 
22e8b31
f10a835
 
 
 
 
 
 
 
 
 
 
 
55d797c
 
f10a835
 
 
 
55d797c
f10a835
 
 
 
55d797c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9756440
 
 
 
 
 
193db9d
f10a835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import json

import datasets
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from loguru import logger

import populate
from about import LEADERBOARD_INTRODUCTION_TEXT, LEADERBOARD_TITLE
from app_configs import DEFAULT_SELECTIONS, THEME
from components.quizbowl.bonus import BonusInterface
from components.quizbowl.tossup import TossupInterface
from components.typed_dicts import PipelineInterfaceDefaults, TossupInterfaceDefaults
from display.css_html_js import fonts_header, js_head, leaderboard_css
from display.custom_css import css_bonus, css_pipeline, css_tossup
from display.guide import BUILDING_MARKDOWN, GUIDE_MARKDOWN, QUICKSTART_MARKDOWN
from display.utils import AutoEvalColumn, fields

# Constants
from envs import (
    API,
    EVAL_REQUESTS_PATH,
    EVAL_RESULTS_PATH,
    LEADERBOARD_REFRESH_INTERVAL,
    PLAYGROUND_DATASET_NAMES,
    QUEUE_REPO,
    REPO_ID,
    RESULTS_REPO,
    SERVER_REFRESH_INTERVAL,
)
from workflows import factory
from workflows.configs import AVAILABLE_MODELS


def restart_space():
    API.restart_space(repo_id=REPO_ID)


def download_dataset_snapshot(repo_id, local_dir):
    try:
        logger.info(f"Downloading dataset snapshot from {repo_id} to {local_dir}")
        snapshot_download(
            repo_id=repo_id,
            local_dir=local_dir,
            repo_type="dataset",
            tqdm_class=None,
        )
    except Exception as e:
        logger.error(f"Error downloading dataset snapshot from {repo_id} to {local_dir}: {e}. Restarting space.")
        restart_space()


download_dataset_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)


def fetch_leaderboard_df():
    logger.info("Leaderboard fetched...")
    download_dataset_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
    return populate.get_leaderboard_df(EVAL_RESULTS_PATH)


def load_dataset(mode: str):
    if mode == "tossup":
        ds = datasets.load_dataset(PLAYGROUND_DATASET_NAMES["tossup"], split="eval")
        ds = ds.filter(lambda x: x["qid"].split("-")[2] == "1" and int(x["qid"].split("-")[3]) <= 10)
    elif mode == "bonus":
        ds = datasets.load_dataset(PLAYGROUND_DATASET_NAMES["bonus"], split="eval")
        ds = ds.filter(lambda x: x["qid"].split("-")[2] == "1" and int(x["qid"].split("-")[3]) <= 10)
    else:
        raise ValueError(f"Invalid mode: {mode}")

    return ds


def get_default_tab_id(request: gr.Request):
    logger.info(f"Request: {request}")
    tab_key_value = request.query_params.get("tab", "tossup")
    return gr.update(selected=tab_key_value)


def presave_pipeline_state(
    login_btn,
    browser_state: dict,
    tossup_pipeline_state: dict,
    tossup_output_state: dict,
    bonus_pipeline_state: dict,
    bonus_output_state: dict,
):
    browser_state.setdefault("tossup", {})
    browser_state["tossup"]["pipeline_state"] = tossup_pipeline_state
    browser_state["tossup"]["output_state"] = tossup_output_state
    browser_state.setdefault("bonus", {})
    browser_state["bonus"]["pipeline_state"] = bonus_pipeline_state
    browser_state["bonus"]["output_state"] = bonus_output_state
    logger.debug(
        f"Pipeline state before login. Login button: {login_btn}, browser state: {json.dumps(browser_state, indent=4)}"
    )
    return login_btn, browser_state


if __name__ == "__main__":
    scheduler = BackgroundScheduler()
    scheduler.add_job(restart_space, "interval", seconds=SERVER_REFRESH_INTERVAL)
    scheduler.start()

    css = css_pipeline + css_tossup + css_bonus + leaderboard_css
    head = fonts_header + js_head
    tossup_ds = load_dataset("tossup")
    bonus_ds = load_dataset("bonus")
    with gr.Blocks(
        css=css,
        head=head,
        theme=THEME,
        title="Quizbowl Bot",
    ) as demo:
        browser_state = gr.BrowserState(
            {
                "tossup": {"pipeline_state": None, "output_state": None},
                "bonus": {"pipeline_state": None, "output_state": None},
            }
        )
        with gr.Row():
            with gr.Column(scale=5):
                gr.Markdown(
                    "## Welcome to Quizbowl Arena! \n### Create, play around, and submit your quizbowl agents.",
                    elem_classes="welcome-text",
                )
            login_btn = gr.LoginButton(scale=1)
        gr.Markdown(
            "**First time here?** Check out the [❓ Help](#help) tab for a quick introduction and the "
            "[walkthrough documentation](https://github.com/stanford-crfm/quizbowl-arena/blob/main/docs/walkthrough.md) "
            "for detailed examples and tutorials on how to create and compete with your own QuizBowl agents.",
            elem_classes="help-text",
        )
        with gr.Tabs() as gtab:
            with gr.Tab("πŸ›ŽοΈ Tossup Agents", id="tossup"):
                defaults = TossupInterfaceDefaults(
                    **DEFAULT_SELECTIONS["tossup"], init_workflow=factory.create_simple_qb_tossup_workflow()
                )
                tossup_interface = TossupInterface(demo, browser_state, tossup_ds, AVAILABLE_MODELS, defaults)
            with gr.Tab("πŸ™‹πŸ»β€β™‚οΈ Bonus Round Agents", id="bonus"):
                defaults = PipelineInterfaceDefaults(
                    **DEFAULT_SELECTIONS["bonus"], init_workflow=factory.create_simple_qb_bonus_workflow()
                )
                bonus_interface = BonusInterface(demo, browser_state, bonus_ds, AVAILABLE_MODELS, defaults)
            with gr.Tab("πŸ… Leaderboard", elem_id="llm-benchmark-tab-table", id="leaderboard"):
                leaderboard_timer = gr.Timer(LEADERBOARD_REFRESH_INTERVAL)
                gr.Markdown("<a id='leaderboard' href='#leaderboard'>QANTA Leaderboard</a>")
                gr.Markdown(LEADERBOARD_INTRODUCTION_TEXT)
                refresh_btn = gr.Button("πŸ”„ Refresh")
                leaderboard_table = gr.Dataframe(
                    value=fetch_leaderboard_df,
                    every=leaderboard_timer,
                    headers=[c.name for c in fields(AutoEvalColumn)],
                    datatype=[c.type for c in fields(AutoEvalColumn)],
                    elem_id="leaderboard-table",
                    interactive=False,
                    visible=True,
                )
                refresh_btn.click(fn=fetch_leaderboard_df, inputs=[], outputs=leaderboard_table)
            with gr.Tab("❓ Help", id="help"):
                with gr.Row():
                    with gr.Column():
                        gr.Markdown(QUICKSTART_MARKDOWN)
                    with gr.Column():
                        gr.Markdown(BUILDING_MARKDOWN)

        # Event Listeners

        login_btn.click(
            fn=presave_pipeline_state,
            inputs=[
                login_btn,
                browser_state,
                tossup_interface.pipeline_state,
                tossup_interface.output_state,
                bonus_interface.pipeline_state,
                bonus_interface.output_state,
            ],
            outputs=[login_btn, browser_state],
        )

        demo.queue(default_concurrency_limit=40).launch()