Maharshi Gor
commited on
Commit
·
98bfd66
1
Parent(s):
e8a2e09
HF pipelines, Model submission msg bugfix,
Browse files- README.md +2 -3
- app.py +8 -11
- shared/workflows +1 -1
- src/components/hf_pipelines.py +177 -45
- src/components/quizbowl/bonus.py +2 -1
- src/components/quizbowl/tossup.py +2 -1
- src/envs.py +30 -24
- src/hf_datasets_utils.py +9 -0
- src/submission/_submit.py +0 -119
- src/submission/submit.py +60 -24
README.md
CHANGED
@@ -1,14 +1,13 @@
|
|
1 |
---
|
2 |
-
title: QANTA 2025
|
3 |
emoji: 🥇
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
license: gpl-3.0
|
10 |
hf_oauth: true
|
11 |
short_description: Quizbowl Agent Creator and Submission platform.
|
12 |
---
|
13 |
-
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: QANTA 2025 Agent Creator and Submission
|
3 |
emoji: 🥇
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.31.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: gpl-3.0
|
11 |
hf_oauth: true
|
12 |
short_description: Quizbowl Agent Creator and Submission platform.
|
13 |
---
|
|
|
|
app.py
CHANGED
@@ -4,7 +4,6 @@ import sys
|
|
4 |
import datasets
|
5 |
import gradio as gr
|
6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
7 |
-
from huggingface_hub import snapshot_download
|
8 |
from loguru import logger
|
9 |
|
10 |
from app_configs import DEFAULT_SELECTIONS, THEME
|
@@ -23,20 +22,18 @@ from envs import (
|
|
23 |
CONTACT_EMAIL,
|
24 |
DISCORD_URL,
|
25 |
DOCS_REPO_URL,
|
26 |
-
EVAL_REQUESTS_PATH,
|
27 |
-
EVAL_RESULTS_PATH,
|
28 |
GITHUB_ISSUES_URL,
|
29 |
-
LEADERBOARD_REFRESH_INTERVAL,
|
30 |
LEADERBOARD_URL,
|
|
|
31 |
LOG_LEVEL,
|
32 |
PLAYGROUND_DATASET,
|
33 |
-
|
34 |
REGISTRATION_URL,
|
35 |
REPO_ID,
|
36 |
-
|
37 |
SERVER_RESTART_INTERVAL,
|
38 |
)
|
39 |
-
from hf_datasets_utils import download_dataset_snapshot
|
40 |
from shared.workflows import factory
|
41 |
from shared.workflows.configs import AVAILABLE_MODELS
|
42 |
from shared.workflows.llms import llm_cache
|
@@ -51,8 +48,6 @@ def restart_space():
|
|
51 |
logger.remove()
|
52 |
logger.add(sys.stdout, level=LOG_LEVEL, diagnose=False)
|
53 |
|
54 |
-
download_dataset_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
55 |
-
|
56 |
|
57 |
def filter_qids(qid: str, packet_ids: list[int]) -> bool:
|
58 |
packet_id = int(qid.split("-")[-2])
|
@@ -92,6 +87,8 @@ def presave_pipeline_state(
|
|
92 |
|
93 |
|
94 |
if __name__ == "__main__":
|
|
|
|
|
95 |
scheduler = BackgroundScheduler()
|
96 |
scheduler.add_job(restart_space, "interval", seconds=SERVER_RESTART_INTERVAL)
|
97 |
scheduler.start()
|
@@ -145,8 +142,8 @@ if __name__ == "__main__":
|
|
145 |
**DEFAULT_SELECTIONS["bonus"], init_workflow=factory.create_simple_qb_bonus_workflow()
|
146 |
)
|
147 |
bonus_interface = BonusInterface(demo, browser_state, bonus_ds, AVAILABLE_MODELS, defaults)
|
148 |
-
|
149 |
-
|
150 |
with gr.Tab("❓ Help", id="help"):
|
151 |
with gr.Row():
|
152 |
with gr.Column():
|
|
|
4 |
import datasets
|
5 |
import gradio as gr
|
6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
7 |
from loguru import logger
|
8 |
|
9 |
from app_configs import DEFAULT_SELECTIONS, THEME
|
|
|
22 |
CONTACT_EMAIL,
|
23 |
DISCORD_URL,
|
24 |
DOCS_REPO_URL,
|
|
|
|
|
25 |
GITHUB_ISSUES_URL,
|
|
|
26 |
LEADERBOARD_URL,
|
27 |
+
LOCAL_REQUESTS_PATH,
|
28 |
LOG_LEVEL,
|
29 |
PLAYGROUND_DATASET,
|
30 |
+
QUEUE_SYNC_INTERVAL,
|
31 |
REGISTRATION_URL,
|
32 |
REPO_ID,
|
33 |
+
REQUESTS_REPO,
|
34 |
SERVER_RESTART_INTERVAL,
|
35 |
)
|
36 |
+
from hf_datasets_utils import check_and_create_dataset_repo, download_dataset_snapshot
|
37 |
from shared.workflows import factory
|
38 |
from shared.workflows.configs import AVAILABLE_MODELS
|
39 |
from shared.workflows.llms import llm_cache
|
|
|
48 |
logger.remove()
|
49 |
logger.add(sys.stdout, level=LOG_LEVEL, diagnose=False)
|
50 |
|
|
|
|
|
51 |
|
52 |
def filter_qids(qid: str, packet_ids: list[int]) -> bool:
|
53 |
packet_id = int(qid.split("-")[-2])
|
|
|
87 |
|
88 |
|
89 |
if __name__ == "__main__":
|
90 |
+
check_and_create_dataset_repo(REQUESTS_REPO)
|
91 |
+
download_dataset_snapshot(REQUESTS_REPO, LOCAL_REQUESTS_PATH)
|
92 |
scheduler = BackgroundScheduler()
|
93 |
scheduler.add_job(restart_space, "interval", seconds=SERVER_RESTART_INTERVAL)
|
94 |
scheduler.start()
|
|
|
142 |
**DEFAULT_SELECTIONS["bonus"], init_workflow=factory.create_simple_qb_bonus_workflow()
|
143 |
)
|
144 |
bonus_interface = BonusInterface(demo, browser_state, bonus_ds, AVAILABLE_MODELS, defaults)
|
145 |
+
with gr.Tab("🤗 HuggingFace Pipelines", elem_id="hf-pipeline-tab", id="hf-pipeline-tab"):
|
146 |
+
hf_pipeline_interface = create_hf_pipeline_submission_interface(demo)
|
147 |
with gr.Tab("❓ Help", id="help"):
|
148 |
with gr.Row():
|
149 |
with gr.Column():
|
shared/workflows
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit 9e8f88dd705d8a9d0a05bef4018782131606461d
|
src/components/hf_pipelines.py
CHANGED
@@ -1,64 +1,196 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
|
3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
def create_hf_pipeline_submission_interface(demo: gr.Blocks):
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
gr.Markdown(
|
10 |
"""
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
14 |
-
`
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
"""
|
21 |
)
|
22 |
-
|
23 |
-
tossup_model_box = gr.Textbox(
|
24 |
-
label="Hugging Face repo or model ID", placeholder="e.g. yourname/my-qbt-model"
|
25 |
-
)
|
26 |
-
tossup_submit_btn = gr.Button("Evaluate")
|
27 |
-
tossup_output_json = gr.JSON(label="Tossup Metrics")
|
28 |
-
tossup_submit_btn.click(
|
29 |
-
lambda model_id: create_hf_submission(model_id, model_id, "", "tossup"),
|
30 |
-
inputs=tossup_model_box,
|
31 |
-
outputs=[tossup_output_json],
|
32 |
-
concurrency_limit=1,
|
33 |
-
)
|
34 |
|
35 |
-
with gr.
|
36 |
gr.Markdown(
|
37 |
"""
|
38 |
-
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
"""
|
52 |
)
|
53 |
-
|
54 |
-
bonus_model_box = gr.Textbox(
|
55 |
-
label="Hugging Face repo or model ID", placeholder="e.g. yourname/my-qbb-model"
|
56 |
-
)
|
57 |
-
bonus_submit_btn = gr.Button("Evaluate")
|
58 |
-
bonus_output_json = gr.JSON(label="Bonus Metrics")
|
59 |
-
bonus_submit_btn.click(
|
60 |
-
lambda model_id: create_hf_submission(model_id, model_id, "", "bonus"),
|
61 |
-
inputs=bonus_model_box,
|
62 |
-
outputs=[bonus_output_json],
|
63 |
-
concurrency_limit=1,
|
64 |
-
)
|
|
|
1 |
import gradio as gr
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
from loguru import logger
|
4 |
|
5 |
+
from display.formatting import styled_error
|
6 |
+
from submission.submit import submit_hf_pipeline_agent, validate_model_name
|
7 |
+
|
8 |
+
|
9 |
+
def error_html(msg: str, model_id) -> str:
|
10 |
+
error_msg = msg.format(
|
11 |
+
model_id=f'<code style="color:#222;background:#f2f2f2;padding:2px 4px;border-radius:4px;">{model_id}</code>'
|
12 |
+
)
|
13 |
+
return (
|
14 |
+
f"<div style='background-color:#ffeaea; color:#b30000; border:1px solid #ffcccc; "
|
15 |
+
f"padding:10px; border-radius:6px; font-size:16px; text-align:left;'>"
|
16 |
+
f"{error_msg}"
|
17 |
+
f"</div>"
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
def verify_pipeline(model_id: str) -> tuple[bool, str]:
|
22 |
+
"""
|
23 |
+
Check if model_id is a public, non-gated model repo on Hugging Face Hub.
|
24 |
+
Returns (success: bool, error_message: str)
|
25 |
+
"""
|
26 |
+
|
27 |
+
try:
|
28 |
+
api = HfApi()
|
29 |
+
# Get model info
|
30 |
+
info = api.model_info(model_id)
|
31 |
+
# Check if model is public and not gated
|
32 |
+
is_public = info.private is False
|
33 |
+
is_gated = getattr(info, "gated", False)
|
34 |
+
if not is_public or is_gated:
|
35 |
+
if not is_public:
|
36 |
+
return False, error_html("Model {model_id} is private. Please make your model public.", model_id)
|
37 |
+
if is_gated:
|
38 |
+
return False, error_html("Model {model_id} is gated. Please use a non-gated model.", model_id)
|
39 |
+
return True, ""
|
40 |
+
except Exception as e:
|
41 |
+
logger.exception(e)
|
42 |
+
return False, error_html(
|
43 |
+
"Could not verify model {model_id}. Please check if the model is public and not gated.", model_id
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
def attempt_submission(model_id: str, description: str, competition_type: str, profile: gr.OAuthProfile | None):
|
48 |
+
if profile is None:
|
49 |
+
return styled_error("Authentication required. Please log in first to submit your model.")
|
50 |
+
|
51 |
+
if "/" not in model_id:
|
52 |
+
full_model_id = f"{profile.username}/{model_id}"
|
53 |
+
|
54 |
+
elif model_id.split("/")[0] != profile.username:
|
55 |
+
return error_html(f"Model {{model_id}} is not owned by you (username: {profile.username}).", model_id)
|
56 |
+
else:
|
57 |
+
full_model_id = model_id
|
58 |
+
model_id = model_id.split("/", 1)[1]
|
59 |
+
valid, msg = validate_model_name(model_id)
|
60 |
+
if not valid:
|
61 |
+
return error_html(msg, model_id)
|
62 |
+
|
63 |
+
success, msg = verify_pipeline(full_model_id)
|
64 |
+
if not success:
|
65 |
+
return msg
|
66 |
+
try:
|
67 |
+
return submit_hf_pipeline_agent(model_id, description, competition_type, profile)
|
68 |
+
except Exception as e:
|
69 |
+
return styled_error(f"Error: Could not submit model '{model_id}': {e}")
|
70 |
+
|
71 |
+
|
72 |
+
def create_model_submission_panel(app: gr.Blocks, competition_type: str):
|
73 |
+
with gr.Column():
|
74 |
+
model_name_input = gr.Textbox(
|
75 |
+
label="Hugging Face Model ID",
|
76 |
+
placeholder="<yourname/my-qbt-model> OR <my-qbt-model>",
|
77 |
+
)
|
78 |
+
description_input = gr.Textbox(
|
79 |
+
label="Pipeline Description",
|
80 |
+
placeholder="e.g. My QBT model is a simple model that uses a pipeline to predict the answer to a question.",
|
81 |
+
)
|
82 |
+
|
83 |
+
submit_btn = gr.Button("📤 Submit", interactive=False)
|
84 |
+
|
85 |
+
submit_status = gr.HTML(label="Submission Status", visible=False)
|
86 |
+
|
87 |
+
def check_user_login(profile: gr.OAuthProfile | None):
|
88 |
+
if profile is not None:
|
89 |
+
return gr.update(interactive=True, value="📤 Submit")
|
90 |
+
return gr.update(interactive=False, value="🔒 Login to submit")
|
91 |
+
|
92 |
+
gr.on(triggers=app.load, fn=check_user_login, inputs=[], outputs=[submit_btn])
|
93 |
+
submit_btn.click(
|
94 |
+
attempt_submission,
|
95 |
+
inputs=[model_name_input, description_input, gr.State(competition_type)],
|
96 |
+
outputs=[submit_status],
|
97 |
+
concurrency_limit=1,
|
98 |
+
)
|
99 |
+
return model_name_input, description_input, submit_btn, submit_status
|
100 |
|
101 |
|
102 |
def create_hf_pipeline_submission_interface(demo: gr.Blocks):
|
103 |
+
gr.Markdown(
|
104 |
+
"""
|
105 |
+
# Submit Your Hugging Face Pipeline Model
|
106 |
+
|
107 |
+
Welcome to the Hugging Face pipeline submission interface for the QANTA 2025 competition!
|
108 |
+
This page allows you to submit your models for both Tossup and Bonus tasks.
|
109 |
+
|
110 |
+
**General Requirements:**
|
111 |
+
- Your model must be a public, non-gated repository on the Hugging Face Hub.
|
112 |
+
- Ensure your model can be loaded using the `pipeline()` function from the `transformers` library.
|
113 |
+
- Adhere to the specified input/output formats for each task.
|
114 |
+
|
115 |
+
For help getting started, check out our [Starter Code](https://github.com/qanta-challenge/qanta25-starter).
|
116 |
+
|
117 |
+
You can also refer to Hugging Face's [custom pipeline creation guide](https://huggingface.co/docs/transformers/en/add_new_pipeline) for more information on how to create a custom pipeline.
|
118 |
+
|
119 |
+
Select the appropriate tab below based on the type of question your model is designed for.
|
120 |
+
"""
|
121 |
+
)
|
122 |
+
with gr.Row():
|
123 |
+
with gr.Column():
|
124 |
gr.Markdown(
|
125 |
"""
|
126 |
+
## 🛎️ QuizBowl Tossup – Submit your model
|
127 |
+
|
128 |
+
Tossup questions are individual questions progressively revealed where you need to provide an answer, a confidence score, and decide whether to buzz.
|
129 |
+
|
130 |
+
**Pipeline Loading:**
|
131 |
+
Your model repository **must** be loadable with:
|
132 |
+
```python
|
133 |
+
from transformers import pipeline
|
134 |
+
model = pipeline(task="quizbowl-tossup", model="<your-username/your-repo-name>")
|
135 |
+
```
|
136 |
|
137 |
+
**Input:**
|
138 |
+
The pipeline will receive a dictionary with the key `question_text` (string) which contains the progressively revealed question so far.
|
139 |
+
```python
|
140 |
+
{
|
141 |
+
"question_text": "In 1900, this city hosted a world's fair that introduced the public to the first escalator. Its famous tower, designed by Gustave Eiffel, was initially criticized by artists but is now a global icon. Name this European capital."
|
142 |
+
}
|
143 |
+
```
|
144 |
|
145 |
+
**Output:**
|
146 |
+
Similar to our agents, the pipeline **must** return a dictionary with the following keys:
|
147 |
+
```python
|
148 |
+
{
|
149 |
+
"answer": <str: concise answer>,
|
150 |
+
"confidence": <float: confidence score between 0.0 and 1.0>,
|
151 |
+
"buzz": <bool: True if your model decides to buzz in with the answer, False otherwise>
|
152 |
+
}
|
153 |
+
```
|
154 |
+
Enter your Hugging Face model repository ID (`<your-username/your-repo-name>`) and a brief description below, then click "Submit".
|
155 |
"""
|
156 |
)
|
157 |
+
create_model_submission_panel(demo, "tossup")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
+
with gr.Column():
|
160 |
gr.Markdown(
|
161 |
"""
|
162 |
+
## 🧐 QuizBowl Bonus – Submit your model
|
163 |
+
|
164 |
+
Bonus questions consist of a lead-in paragraph followed by multiple parts. Your model will be called for each part.
|
165 |
|
166 |
+
**Pipeline Loading:**
|
167 |
+
Your model repository **must** be loadable with:
|
168 |
+
```python
|
169 |
+
from transformers import pipeline
|
170 |
+
model = pipeline(task="quizbowl-bonus", model="<your-username/your-repo-name>")
|
171 |
+
```
|
172 |
|
173 |
+
**Input:**
|
174 |
+
The pipeline will receive a dictionary with two keys:
|
175 |
+
- `leadin` (str): The introductory paragraph for the bonus question.
|
176 |
+
- `part` (str): The specific part of the bonus question to answer.
|
177 |
+
```python
|
178 |
+
{
|
179 |
+
"leadin": "This author wrote about a young wizard attending a magical school.",
|
180 |
+
"part": "For 10 points, name this author."
|
181 |
+
}
|
182 |
+
```
|
183 |
|
184 |
+
**Output:**
|
185 |
+
Similar to our agents, the pipeline **must** return a dictionary with the following keys for each part:
|
186 |
+
```python
|
187 |
+
{
|
188 |
+
"answer": <str: concise answer to the part>,
|
189 |
+
"confidence": <float: confidence score between 0.0 and 1.0>,
|
190 |
+
"explanation": <str: brief explanation (<=30 words) for your answer (useful for human collaboration)>
|
191 |
+
}
|
192 |
+
```
|
193 |
+
Enter your Hugging Face model repository ID (`<your-username/your-repo-name>`) and a brief description below, then click "Submit".
|
194 |
"""
|
195 |
)
|
196 |
+
create_model_submission_panel(demo, "bonus")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/quizbowl/bonus.py
CHANGED
@@ -291,7 +291,8 @@ class BonusInterface:
|
|
291 |
"""Submit the model output."""
|
292 |
try:
|
293 |
workflow = self.validator.validate_state_dict(state_dict, playground=False)
|
294 |
-
|
|
|
295 |
except Exception as e:
|
296 |
logger.exception(f"Error submitting model: {e.args}")
|
297 |
return gr.update(visible=True, value=styled_error(f"Error: {str(e)}"))
|
|
|
291 |
"""Submit the model output."""
|
292 |
try:
|
293 |
workflow = self.validator.validate_state_dict(state_dict, playground=False)
|
294 |
+
msg = submit.submit_workflow_agent(model_name, description, "bonus", workflow, profile)
|
295 |
+
return gr.update(visible=True, value=msg)
|
296 |
except Exception as e:
|
297 |
logger.exception(f"Error submitting model: {e.args}")
|
298 |
return gr.update(visible=True, value=styled_error(f"Error: {str(e)}"))
|
src/components/quizbowl/tossup.py
CHANGED
@@ -332,7 +332,8 @@ class TossupInterface:
|
|
332 |
"""Submit the model output."""
|
333 |
try:
|
334 |
workflow = self.validator.validate_state_dict(state_dict, playground=False)
|
335 |
-
|
|
|
336 |
except Exception as e:
|
337 |
logger.exception(f"Error submitting model: {e.args}")
|
338 |
return gr.update(visible=True, value=styled_error(f"Error: {str(e)}"))
|
|
|
332 |
"""Submit the model output."""
|
333 |
try:
|
334 |
workflow = self.validator.validate_state_dict(state_dict, playground=False)
|
335 |
+
msg = submit.submit_workflow_agent(model_name, description, "tossup", workflow, profile)
|
336 |
+
return gr.update(visible=True, value=msg)
|
337 |
except Exception as e:
|
338 |
logger.exception(f"Error submitting model: {e.args}")
|
339 |
return gr.update(visible=True, value=styled_error(f"Error: {str(e)}"))
|
src/envs.py
CHANGED
@@ -2,54 +2,60 @@ import os
|
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
-
#
|
6 |
-
# ----------------------------------
|
7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
|
|
8 |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
9 |
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
|
10 |
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")
|
11 |
|
|
|
|
|
|
|
|
|
12 |
# Change to your org - don't forget to create a results and request dataset, with the correct format!
|
13 |
OWNER = "qanta-challenge"
|
14 |
|
|
|
15 |
REPO_ID = f"{OWNER}/quizbowl-submission"
|
16 |
-
QUEUE_REPO = f"{OWNER}/advcal-requests"
|
17 |
RESULTS_REPO = f"{OWNER}/advcal-results"
|
18 |
-
LLM_CACHE_REPO = f"{OWNER}/advcal-llm-cache"
|
19 |
USERS_REPO = f"{OWNER}/registered-users"
|
20 |
-
EVAL_SPLITS = ["tiny_eval"]
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
|
25 |
-
DOCS_REPO_URL = "https://github.com/qanta-challenge/QANTA25"
|
26 |
-
DOCS_URL = DOCS_REPO_URL + "/tree/main"
|
27 |
-
GITHUB_ISSUES_URL = DOCS_REPO_URL + "/issues"
|
28 |
|
29 |
-
CONTACT_EMAIL = "[email protected]"
|
30 |
-
DISCORD_URL = "https://discord.gg/ChmDVatJ6Y"
|
31 |
-
REGISTRATION_URL = "https://huggingface.co/spaces/qanta-challenge/register"
|
32 |
-
LEADERBOARD_URL = "https://huggingface.co/spaces/qanta-challenge/leaderboard"
|
33 |
-
EXAMPLES_PATH = "examples"
|
34 |
|
35 |
PLAYGROUND_DATASET = f"{OWNER}/acf-co24"
|
36 |
|
37 |
# ----------------------------------
|
38 |
|
39 |
# If you setup a cache later, just change HF_HOME
|
40 |
-
CACHE_PATH =
|
41 |
-
|
42 |
|
43 |
# Local caches
|
44 |
-
LLM_CACHE_PATH = os.path.join(
|
45 |
-
USERS_PATH = os.path.join(
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
50 |
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
LLM_CACHE_REFRESH_INTERVAL = 600 # seconds (30 minutes)
|
|
|
53 |
SERVER_RESTART_INTERVAL = 2 * 24 * 60 * 60 # seconds (2 days)
|
54 |
LEADERBOARD_REFRESH_INTERVAL = 600 # seconds (10 minutes)
|
55 |
|
|
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
+
# --------------* Secrets *----------------------------------------
|
|
|
6 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
7 |
+
ENV_NAME = os.getenv("ENV_NAME", "test") # Use advcal for production, test for testing
|
8 |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
9 |
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
|
10 |
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")
|
11 |
|
12 |
+
# ------------------------ ENV VARS --------------------------------
|
13 |
+
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
14 |
+
HF_HOME = os.getenv("HF_HOME", ".hf")
|
15 |
+
|
16 |
# Change to your org - don't forget to create a results and request dataset, with the correct format!
|
17 |
OWNER = "qanta-challenge"
|
18 |
|
19 |
+
# --------------* READ-ONLY REPOS *---------------------------------
|
20 |
REPO_ID = f"{OWNER}/quizbowl-submission"
|
|
|
21 |
RESULTS_REPO = f"{OWNER}/advcal-results"
|
|
|
22 |
USERS_REPO = f"{OWNER}/registered-users"
|
|
|
23 |
|
24 |
+
# --------------* READ-WRITE REPOS *--------------------------------
|
25 |
+
REQUESTS_REPO = f"{OWNER}/{ENV_NAME}-requests"
|
26 |
+
LLM_CACHE_REPO = f"{OWNER}/{ENV_NAME}-llm-cache"
|
|
|
|
|
|
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
PLAYGROUND_DATASET = f"{OWNER}/acf-co24"
|
30 |
|
31 |
# ----------------------------------
|
32 |
|
33 |
# If you setup a cache later, just change HF_HOME
|
34 |
+
CACHE_PATH = ".cache"
|
35 |
+
EXAMPLES_PATH = "examples"
|
36 |
|
37 |
# Local caches
|
38 |
+
LLM_CACHE_PATH = os.path.join(HF_HOME, "llm-cache")
|
39 |
+
USERS_PATH = os.path.join(HF_HOME, "registered-users")
|
40 |
+
LOCAL_REQUESTS_PATH = os.path.join(HF_HOME, "eval-queue")
|
41 |
+
LOCAL_RESULTS_PATH = os.path.join(HF_HOME, "eval-results")
|
42 |
+
LITELLM_CACHE_DIR = os.getenv("LITELLM_CACHE_DIR", f"{CACHE_PATH}/litellm-cache")
|
|
|
43 |
|
44 |
|
45 |
+
# --------------* IMPORTANT LINKS *--------------------------------
|
46 |
+
QANTA_WEBSITE_URL = "https://sites.google.com/view/qanta/home"
|
47 |
+
COMPETITION_URL = "https://sites.google.com/view/qanta/2025-competition"
|
48 |
+
DOCS_REPO_URL = "https://github.com/qanta-challenge/QANTA25"
|
49 |
+
DOCS_URL = DOCS_REPO_URL + "/tree/main"
|
50 |
+
GITHUB_ISSUES_URL = DOCS_REPO_URL + "/issues"
|
51 |
+
|
52 |
+
CONTACT_EMAIL = "[email protected]"
|
53 |
+
DISCORD_URL = "https://discord.gg/ChmDVatJ6Y"
|
54 |
+
REGISTRATION_URL = "https://huggingface.co/spaces/qanta-challenge/register"
|
55 |
+
LEADERBOARD_URL = "https://huggingface.co/spaces/qanta-challenge/leaderboard"
|
56 |
+
|
57 |
LLM_CACHE_REFRESH_INTERVAL = 600 # seconds (30 minutes)
|
58 |
+
QUEUE_SYNC_INTERVAL = 600 # seconds (10 minutes)
|
59 |
SERVER_RESTART_INTERVAL = 2 * 24 * 60 * 60 # seconds (2 days)
|
60 |
LEADERBOARD_REFRESH_INTERVAL = 600 # seconds (10 minutes)
|
61 |
|
src/hf_datasets_utils.py
CHANGED
@@ -4,6 +4,15 @@ from loguru import logger
|
|
4 |
api = HfApi()
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def download_dataset_snapshot(repo_id, local_dir):
|
8 |
try:
|
9 |
logger.info(f"Downloading dataset snapshot from {repo_id} to {local_dir}")
|
|
|
4 |
api = HfApi()
|
5 |
|
6 |
|
7 |
+
def check_and_create_dataset_repo(repo_id: str):
|
8 |
+
try:
|
9 |
+
api.repo_info(repo_id=repo_id, repo_type="dataset")
|
10 |
+
print(f"{repo_id} exists")
|
11 |
+
except Exception:
|
12 |
+
print(f"Creating {repo_id}")
|
13 |
+
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True, private=True)
|
14 |
+
|
15 |
+
|
16 |
def download_dataset_snapshot(repo_id, local_dir):
|
17 |
try:
|
18 |
logger.info(f"Downloading dataset snapshot from {repo_id} to {local_dir}")
|
src/submission/_submit.py
DELETED
@@ -1,119 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
from datetime import datetime, timezone
|
4 |
-
|
5 |
-
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
|
7 |
-
from src.submission.check_validity import (
|
8 |
-
already_submitted_models,
|
9 |
-
check_model_card,
|
10 |
-
get_model_size,
|
11 |
-
is_model_on_hub,
|
12 |
-
)
|
13 |
-
|
14 |
-
REQUESTED_MODELS = None
|
15 |
-
USERS_TO_SUBMISSION_DATES = None
|
16 |
-
|
17 |
-
def add_new_eval(
|
18 |
-
model: str,
|
19 |
-
base_model: str,
|
20 |
-
revision: str,
|
21 |
-
precision: str,
|
22 |
-
weight_type: str,
|
23 |
-
model_type: str,
|
24 |
-
):
|
25 |
-
global REQUESTED_MODELS
|
26 |
-
global USERS_TO_SUBMISSION_DATES
|
27 |
-
if not REQUESTED_MODELS:
|
28 |
-
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
29 |
-
|
30 |
-
user_name = ""
|
31 |
-
model_path = model
|
32 |
-
if "/" in model:
|
33 |
-
user_name = model.split("/")[0]
|
34 |
-
model_path = model.split("/")[1]
|
35 |
-
|
36 |
-
precision = precision.split(" ")[0]
|
37 |
-
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
38 |
-
|
39 |
-
if model_type is None or model_type == "":
|
40 |
-
return styled_error("Please select a model type.")
|
41 |
-
|
42 |
-
# Does the model actually exist?
|
43 |
-
if revision == "":
|
44 |
-
revision = "main"
|
45 |
-
|
46 |
-
# Is the model on the hub?
|
47 |
-
if weight_type in ["Delta", "Adapter"]:
|
48 |
-
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
|
49 |
-
if not base_model_on_hub:
|
50 |
-
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
-
|
52 |
-
if not weight_type == "Adapter":
|
53 |
-
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
54 |
-
if not model_on_hub:
|
55 |
-
return styled_error(f'Model "{model}" {error}')
|
56 |
-
|
57 |
-
# Is the model info correctly filled?
|
58 |
-
try:
|
59 |
-
model_info = API.model_info(repo_id=model, revision=revision)
|
60 |
-
except Exception:
|
61 |
-
return styled_error("Could not get your model information. Please fill it up properly.")
|
62 |
-
|
63 |
-
model_size = get_model_size(model_info=model_info, precision=precision)
|
64 |
-
|
65 |
-
# Were the model card and license filled?
|
66 |
-
try:
|
67 |
-
license = model_info.cardData["license"]
|
68 |
-
except Exception:
|
69 |
-
return styled_error("Please select a license for your model")
|
70 |
-
|
71 |
-
modelcard_OK, error_msg = check_model_card(model)
|
72 |
-
if not modelcard_OK:
|
73 |
-
return styled_error(error_msg)
|
74 |
-
|
75 |
-
# Seems good, creating the eval
|
76 |
-
print("Adding new eval")
|
77 |
-
|
78 |
-
eval_entry = {
|
79 |
-
"model": model,
|
80 |
-
"base_model": base_model,
|
81 |
-
"revision": revision,
|
82 |
-
"precision": precision,
|
83 |
-
"weight_type": weight_type,
|
84 |
-
"status": "PENDING",
|
85 |
-
"submitted_time": current_time,
|
86 |
-
"model_type": model_type,
|
87 |
-
"likes": model_info.likes,
|
88 |
-
"params": model_size,
|
89 |
-
"license": license,
|
90 |
-
"private": False,
|
91 |
-
}
|
92 |
-
|
93 |
-
# Check for duplicate submission
|
94 |
-
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
95 |
-
return styled_warning("This model has been already submitted.")
|
96 |
-
|
97 |
-
print("Creating eval file")
|
98 |
-
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
99 |
-
os.makedirs(OUT_DIR, exist_ok=True)
|
100 |
-
out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
|
101 |
-
|
102 |
-
with open(out_path, "w") as f:
|
103 |
-
f.write(json.dumps(eval_entry))
|
104 |
-
|
105 |
-
print("Uploading eval file")
|
106 |
-
API.upload_file(
|
107 |
-
path_or_fileobj=out_path,
|
108 |
-
path_in_repo=out_path.split("eval-queue/")[1],
|
109 |
-
repo_id=QUEUE_REPO,
|
110 |
-
repo_type="dataset",
|
111 |
-
commit_message=f"Add {model} to eval queue",
|
112 |
-
)
|
113 |
-
|
114 |
-
# Remove the local file
|
115 |
-
os.remove(out_path)
|
116 |
-
|
117 |
-
return styled_message(
|
118 |
-
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
119 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/submission/submit.py
CHANGED
@@ -12,14 +12,14 @@ from loguru import logger
|
|
12 |
|
13 |
from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
|
14 |
from display.formatting import styled_error, styled_message
|
15 |
-
from envs import API,
|
16 |
from shared.workflows.structs import TossupWorkflow, Workflow
|
17 |
-
from submission.structs import CompetitionType, Submission, SubmissionStatus
|
18 |
|
19 |
|
20 |
def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
|
21 |
"""Get all submissions for a user."""
|
22 |
-
out_dir = f"{
|
23 |
submissions = []
|
24 |
if not os.path.exists(out_dir):
|
25 |
return submissions
|
@@ -57,7 +57,7 @@ def get_user_submissions_by_date(
|
|
57 |
) -> dict[str, list[Submission]]:
|
58 |
"""Get all submissions for a user for a given competition type."""
|
59 |
date_str = datetime(year, month, day).strftime("%Y%m%d")
|
60 |
-
out_dir = f"{
|
61 |
if not os.path.exists(out_dir):
|
62 |
return {}
|
63 |
submissions = []
|
@@ -171,11 +171,12 @@ def validate_model_name(model_name: str):
|
|
171 |
return True, ""
|
172 |
|
173 |
|
174 |
-
def
|
175 |
model_name: str,
|
176 |
description: str,
|
177 |
-
workflow: Workflow,
|
178 |
competition_type: CompetitionType,
|
|
|
|
|
179 |
profile: gr.OAuthProfile | None,
|
180 |
) -> str:
|
181 |
"""
|
@@ -212,33 +213,49 @@ def submit_model(
|
|
212 |
return styled_error(f"Submission Error! Invalid model name '{model_name}'.<br>{error_msg}")
|
213 |
|
214 |
try:
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
# Convert to dictionary format
|
223 |
submission_dict = submission.to_dict()
|
224 |
|
225 |
# Create output directory path
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
# Upload to HuggingFace dataset
|
230 |
API.upload_file(
|
231 |
-
path_or_fileobj=
|
232 |
-
path_in_repo=
|
233 |
-
repo_id=
|
234 |
repo_type="dataset",
|
235 |
-
commit_message=f"Add
|
236 |
)
|
237 |
|
238 |
return styled_message(
|
239 |
-
f"Successfully submitted
|
240 |
f"Submission name: {username}/{model_name}<br>"
|
241 |
-
f"Please wait for up to
|
242 |
)
|
243 |
|
244 |
except Exception as e:
|
@@ -246,6 +263,27 @@ def submit_model(
|
|
246 |
return styled_error(f"Error submitting model: {str(e)}")
|
247 |
|
248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
def load_demo_example(model_name: str, competition_type: CompetitionType) -> Workflow | TossupWorkflow:
|
250 |
"""Load a demo example submission."""
|
251 |
examples_dir = f"{EXAMPLES_PATH}/{competition_type}"
|
@@ -288,12 +326,10 @@ if __name__ == "__main__":
|
|
288 |
)
|
289 |
|
290 |
# Submit model
|
291 |
-
result =
|
292 |
model_name="GPT-4 Tossup",
|
293 |
description="A simple GPT-4 model for tossup questions",
|
294 |
workflow=workflow,
|
295 |
competition_type="tossup",
|
296 |
)
|
297 |
print(result)
|
298 |
-
|
299 |
-
# %%
|
|
|
12 |
|
13 |
from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
|
14 |
from display.formatting import styled_error, styled_message
|
15 |
+
from envs import API, EXAMPLES_PATH, LOCAL_REQUESTS_PATH, OWNER, REQUESTS_REPO
|
16 |
from shared.workflows.structs import TossupWorkflow, Workflow
|
17 |
+
from submission.structs import CompetitionType, Submission, SubmissionStatus, SubmissionType
|
18 |
|
19 |
|
20 |
def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
|
21 |
"""Get all submissions for a user."""
|
22 |
+
out_dir = f"{LOCAL_REQUESTS_PATH}/{username}"
|
23 |
submissions = []
|
24 |
if not os.path.exists(out_dir):
|
25 |
return submissions
|
|
|
57 |
) -> dict[str, list[Submission]]:
|
58 |
"""Get all submissions for a user for a given competition type."""
|
59 |
date_str = datetime(year, month, day).strftime("%Y%m%d")
|
60 |
+
out_dir = f"{LOCAL_REQUESTS_PATH}/{username}"
|
61 |
if not os.path.exists(out_dir):
|
62 |
return {}
|
63 |
submissions = []
|
|
|
171 |
return True, ""
|
172 |
|
173 |
|
174 |
+
def submit_agent(
|
175 |
model_name: str,
|
176 |
description: str,
|
|
|
177 |
competition_type: CompetitionType,
|
178 |
+
submission_type: SubmissionType,
|
179 |
+
workflow: Workflow | None,
|
180 |
profile: gr.OAuthProfile | None,
|
181 |
) -> str:
|
182 |
"""
|
|
|
213 |
return styled_error(f"Submission Error! Invalid model name '{model_name}'.<br>{error_msg}")
|
214 |
|
215 |
try:
|
216 |
+
if submission_type == "simple_workflow":
|
217 |
+
submission = create_workflow_submission(
|
218 |
+
username=username,
|
219 |
+
model_name=model_name,
|
220 |
+
description=description,
|
221 |
+
workflow=workflow,
|
222 |
+
competition_type=competition_type,
|
223 |
+
)
|
224 |
+
elif submission_type == "hf_pipeline":
|
225 |
+
submission = create_hf_submission(
|
226 |
+
username=username,
|
227 |
+
model_name=model_name,
|
228 |
+
description=description,
|
229 |
+
competition_type=competition_type,
|
230 |
+
)
|
231 |
+
else:
|
232 |
+
return styled_error(f"Unsupported submission type: {submission_type}")
|
233 |
+
|
234 |
# Convert to dictionary format
|
235 |
submission_dict = submission.to_dict()
|
236 |
|
237 |
# Create output directory path
|
238 |
+
|
239 |
+
rel_filepath = f"{username}/{submission.id}.json"
|
240 |
+
filepath = f"{LOCAL_REQUESTS_PATH}/{rel_filepath}"
|
241 |
+
|
242 |
+
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
243 |
+
with open(filepath, "w") as f:
|
244 |
+
json.dump(submission_dict, f, indent=2)
|
245 |
|
246 |
# Upload to HuggingFace dataset
|
247 |
API.upload_file(
|
248 |
+
path_or_fileobj=filepath,
|
249 |
+
path_in_repo=rel_filepath,
|
250 |
+
repo_id=REQUESTS_REPO,
|
251 |
repo_type="dataset",
|
252 |
+
commit_message=f"Add {competition_type} {submission_type} submission {submission.id}",
|
253 |
)
|
254 |
|
255 |
return styled_message(
|
256 |
+
f"Successfully submitted {competition_type} {submission_type} model!<br>"
|
257 |
f"Submission name: {username}/{model_name}<br>"
|
258 |
+
f"Please wait for up to one day for the model to show up on the leaderboard."
|
259 |
)
|
260 |
|
261 |
except Exception as e:
|
|
|
263 |
return styled_error(f"Error submitting model: {str(e)}")
|
264 |
|
265 |
|
266 |
+
def submit_hf_pipeline_agent(
|
267 |
+
model_name: str,
|
268 |
+
description: str,
|
269 |
+
competition_type: CompetitionType,
|
270 |
+
profile: gr.OAuthProfile | None,
|
271 |
+
) -> str:
|
272 |
+
return submit_agent(model_name, description, competition_type, "hf_pipeline", workflow=None, profile=profile)
|
273 |
+
|
274 |
+
|
275 |
+
def submit_workflow_agent(
|
276 |
+
model_name: str,
|
277 |
+
description: str,
|
278 |
+
competition_type: CompetitionType,
|
279 |
+
workflow: Workflow,
|
280 |
+
profile: gr.OAuthProfile | None,
|
281 |
+
) -> str:
|
282 |
+
return submit_agent(
|
283 |
+
model_name, description, competition_type, "simple_workflow", workflow=workflow, profile=profile
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
def load_demo_example(model_name: str, competition_type: CompetitionType) -> Workflow | TossupWorkflow:
|
288 |
"""Load a demo example submission."""
|
289 |
examples_dir = f"{EXAMPLES_PATH}/{competition_type}"
|
|
|
326 |
)
|
327 |
|
328 |
# Submit model
|
329 |
+
result = submit_agent(
|
330 |
model_name="GPT-4 Tossup",
|
331 |
description="A simple GPT-4 model for tossup questions",
|
332 |
workflow=workflow,
|
333 |
competition_type="tossup",
|
334 |
)
|
335 |
print(result)
|
|
|
|