File size: 5,006 Bytes
193db9d
 
973519b
 
55d797c
193db9d
55d797c
 
0bab47c
193db9d
 
55d797c
3b39b49
9756440
55d797c
193db9d
 
633b045
973519b
 
 
55d797c
973519b
 
 
 
55d797c
973519b
193db9d
0bab47c
193db9d
973519b
 
 
 
 
55d797c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193db9d
55d797c
 
 
 
 
193db9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55d797c
 
 
 
 
 
22e8b31
 
55d797c
22e8b31
 
55d797c
 
193db9d
 
22e8b31
55d797c
 
193db9d
 
22e8b31
 
 
55d797c
 
193db9d
0bab47c
193db9d
22e8b31
55d797c
193db9d
0bab47c
193db9d
22e8b31
55d797c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9756440
 
 
 
 
 
193db9d
22e8b31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import datasets
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from loguru import logger

import populate
from about import LEADERBOARD_INTRODUCTION_TEXT, LEADERBOARD_TITLE
from app_configs import DEFAULT_SELECTIONS, THEME
from components.quizbowl.bonus import BonusInterface
from components.quizbowl.tossup import TossupInterface
from display.css_html_js import fonts_header, js_head, leaderboard_css
from display.custom_css import css_bonus, css_pipeline, css_tossup
from display.guide import BUILDING_MARKDOWN, GUIDE_MARKDOWN, QUICKSTART_MARKDOWN
from display.utils import AutoEvalColumn, fields

# Constants
from envs import (
    API,
    EVAL_REQUESTS_PATH,
    EVAL_RESULTS_PATH,
    LEADERBOARD_REFRESH_INTERVAL,
    PLAYGROUND_DATASET_NAMES,
    QUEUE_REPO,
    REPO_ID,
    RESULTS_REPO,
    SERVER_REFRESH_INTERVAL,
)
from workflows import factory
from workflows.configs import AVAILABLE_MODELS


def restart_space():
    API.restart_space(repo_id=REPO_ID)


def download_dataset_snapshot(repo_id, local_dir):
    try:
        logger.info(f"Downloading dataset snapshot from {repo_id} to {local_dir}")
        snapshot_download(
            repo_id=repo_id,
            local_dir=local_dir,
            repo_type="dataset",
            tqdm_class=None,
        )
    except Exception as e:
        logger.error(f"Error downloading dataset snapshot from {repo_id} to {local_dir}: {e}. Restarting space.")
        restart_space()


download_dataset_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)


def fetch_leaderboard_df():
    logger.info("Leaderboard fetched...")
    download_dataset_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
    return populate.get_leaderboard_df(EVAL_RESULTS_PATH)


def load_dataset(mode: str):
    if mode == "tossup":
        ds = datasets.load_dataset(PLAYGROUND_DATASET_NAMES["tossup"], split="eval")
        ds = ds.filter(lambda x: x["qid"].split("-")[2] == "1" and int(x["qid"].split("-")[3]) <= 10)
    elif mode == "bonus":
        ds = datasets.load_dataset(PLAYGROUND_DATASET_NAMES["bonus"], split="eval")
        ds = ds.filter(lambda x: x["qid"].split("-")[2] == "1" and int(x["qid"].split("-")[3]) <= 10)
    else:
        raise ValueError(f"Invalid mode: {mode}")

    return ds


def get_default_tab_id(request: gr.Request):
    logger.info(f"Request: {request}")
    tab_key_value = request.query_params.get("tab", "tossup")
    return gr.update(selected=tab_key_value)


if __name__ == "__main__":
    scheduler = BackgroundScheduler()
    scheduler.add_job(restart_space, "interval", seconds=SERVER_REFRESH_INTERVAL)
    scheduler.start()

    css = css_pipeline + css_tossup + css_bonus + leaderboard_css
    head = fonts_header + js_head
    tossup_ds = load_dataset("tossup")
    bonus_ds = load_dataset("bonus")
    with gr.Blocks(
        css=css,
        head=head,
        theme=THEME,
        title="Quizbowl Bot",
    ) as demo:
        with gr.Row():
            gr.Markdown("## Welcome to Quizbowl Bot! This is a tool for creating and testing quizbowl agents.")
        with gr.Tabs() as gtab:
            with gr.Tab("πŸ›ŽοΈ Tossup Agents", id="tossup"):
                defaults = DEFAULT_SELECTIONS["tossup"] | {
                    "init_workflow": factory.create_simple_qb_tossup_workflow(),
                }
                tossup_interface = TossupInterface(demo, tossup_ds, AVAILABLE_MODELS, defaults)
            with gr.Tab("πŸ™‹πŸ»β€β™‚οΈ Bonus Round Agents", id="bonus"):
                defaults = DEFAULT_SELECTIONS["bonus"] | {
                    "init_workflow": factory.create_simple_qb_bonus_workflow(),
                }
                bonus_interface = BonusInterface(demo, bonus_ds, AVAILABLE_MODELS, defaults)
            with gr.Tab("πŸ… Leaderboard", elem_id="llm-benchmark-tab-table", id="leaderboard"):
                leaderboard_timer = gr.Timer(LEADERBOARD_REFRESH_INTERVAL)
                gr.Markdown("<a id='leaderboard' href='#leaderboard'>QANTA Leaderboard</a>")
                gr.Markdown(LEADERBOARD_INTRODUCTION_TEXT)
                refresh_btn = gr.Button("πŸ”„ Refresh")
                leaderboard_table = gr.Dataframe(
                    value=fetch_leaderboard_df,
                    every=leaderboard_timer,
                    headers=[c.name for c in fields(AutoEvalColumn)],
                    datatype=[c.type for c in fields(AutoEvalColumn)],
                    elem_id="leaderboard-table",
                    interactive=False,
                    visible=True,
                )
                refresh_btn.click(fn=fetch_leaderboard_df, inputs=[], outputs=leaderboard_table)
            with gr.Tab("❓ Help", id="help"):
                with gr.Row():
                    with gr.Column():
                        gr.Markdown(QUICKSTART_MARKDOWN)
                    with gr.Column():
                        gr.Markdown(BUILDING_MARKDOWN)

    demo.queue(default_concurrency_limit=40).launch()