Spaces:
Runtime error
Runtime error
Create app.py
#1
by
cyka-blyat
- opened
- .gitattributes +35 -0
- .gitignore +0 -4
- api.py +0 -135
- app.py +0 -191
- competitions.py +0 -35
- requirements.txt +0 -12
- utils.py +0 -505
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
.venv
|
2 |
-
__pycache__/
|
3 |
-
.env
|
4 |
-
**.ipynb
|
|
|
|
|
|
|
|
|
|
api.py
DELETED
@@ -1,135 +0,0 @@
|
|
1 |
-
|
2 |
-
import atexit
|
3 |
-
import datetime
|
4 |
-
|
5 |
-
from flask import Flask, request, jsonify
|
6 |
-
from apscheduler.schedulers.background import BackgroundScheduler
|
7 |
-
|
8 |
-
import utils
|
9 |
-
|
10 |
-
app = Flask(__name__)
|
11 |
-
|
12 |
-
# Global variables (saves time on loading data)
|
13 |
-
state_vars = None
|
14 |
-
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
15 |
-
|
16 |
-
|
17 |
-
def load_data(test=False):
|
18 |
-
"""
|
19 |
-
Reload the state variables
|
20 |
-
"""
|
21 |
-
global state_vars, reload_timestamp
|
22 |
-
if test:
|
23 |
-
state_vars = utils.test_load_state_vars()
|
24 |
-
else:
|
25 |
-
state_vars = utils.load_state_vars()
|
26 |
-
|
27 |
-
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
28 |
-
|
29 |
-
print(f'Reloaded data at {reload_timestamp}')
|
30 |
-
|
31 |
-
|
32 |
-
def start_scheduler():
|
33 |
-
scheduler = BackgroundScheduler()
|
34 |
-
scheduler.add_job(func=load_data, trigger="interval", seconds=60*30)
|
35 |
-
scheduler.start()
|
36 |
-
|
37 |
-
# Shut down the scheduler when exiting the app
|
38 |
-
atexit.register(lambda: scheduler.shutdown())
|
39 |
-
|
40 |
-
|
41 |
-
@app.route('/', methods=['GET'])
|
42 |
-
def home():
|
43 |
-
return "Welcome to the Bittensor Pretraining Leaderboard API!"
|
44 |
-
|
45 |
-
|
46 |
-
@app.route('/updated', methods=['GET'])
|
47 |
-
def updated():
|
48 |
-
return reload_timestamp
|
49 |
-
|
50 |
-
|
51 |
-
@app.route('/benchmark', methods=['GET'])
|
52 |
-
def benchmark():
|
53 |
-
"""
|
54 |
-
Get the benchmarks and the timestamp
|
55 |
-
|
56 |
-
Returns:
|
57 |
-
- benchmarks: List of dicts (from pandas DataFrame)
|
58 |
-
- benchmark_timestamp: String
|
59 |
-
"""
|
60 |
-
|
61 |
-
benchmarks = state_vars.get("benchmarks", None)
|
62 |
-
benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
|
63 |
-
|
64 |
-
return jsonify(
|
65 |
-
{
|
66 |
-
"benchmarks": benchmarks.to_dict(orient='records'),
|
67 |
-
"benchmark_timestamp": benchmark_timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
68 |
-
}
|
69 |
-
)
|
70 |
-
|
71 |
-
@app.route('/metagraph', methods=['GET'])
|
72 |
-
def metagraph():
|
73 |
-
"""
|
74 |
-
Get the metagraph data
|
75 |
-
Returns:
|
76 |
-
- metagraph_data: List of dicts (from pandas DataFrame)
|
77 |
-
"""
|
78 |
-
|
79 |
-
metagraph = state_vars["metagraph"]
|
80 |
-
|
81 |
-
return jsonify(
|
82 |
-
utils.make_metagraph_dataframe(metagraph).to_dict(orient='records')
|
83 |
-
)
|
84 |
-
|
85 |
-
@app.route('/leaderboard', methods=['GET'])
|
86 |
-
def leaderboard():
|
87 |
-
"""
|
88 |
-
Get the leaderboard data
|
89 |
-
Returns:
|
90 |
-
- leaderboard_data: List of dicts (from pandas DataFrame)
|
91 |
-
"""
|
92 |
-
|
93 |
-
model_data = state_vars["model_data"]
|
94 |
-
scores = state_vars["scores"]
|
95 |
-
show_stale = request.args.get('show_stale')
|
96 |
-
return jsonify(
|
97 |
-
utils.leaderboard_data(model_data, scores, show_stale=show_stale)
|
98 |
-
)
|
99 |
-
|
100 |
-
|
101 |
-
@app.route('/loss', methods=['GET'])
|
102 |
-
def loss():
|
103 |
-
"""
|
104 |
-
Get the losses over time
|
105 |
-
Returns:
|
106 |
-
- losses_over_time: List of dicts (from pandas DataFrame)
|
107 |
-
"""
|
108 |
-
vali_runs = state_vars["vali_runs"]
|
109 |
-
|
110 |
-
return jsonify(
|
111 |
-
utils.get_losses_over_time(vali_runs).to_dict(orient='records')
|
112 |
-
)
|
113 |
-
|
114 |
-
|
115 |
-
@app.route('/validator', methods=['GET'])
|
116 |
-
def validator():
|
117 |
-
"""
|
118 |
-
Get the validator data
|
119 |
-
Returns:
|
120 |
-
- validator_data: List of dicts (from pandas DataFrame)
|
121 |
-
"""
|
122 |
-
model_data = state_vars["model_data"]
|
123 |
-
validator_df = state_vars["validator_df"]
|
124 |
-
|
125 |
-
return jsonify(
|
126 |
-
utils.make_validator_dataframe(validator_df, model_data).to_dict(orient='records')
|
127 |
-
)
|
128 |
-
|
129 |
-
|
130 |
-
if __name__ == '__main__':
|
131 |
-
|
132 |
-
load_data()
|
133 |
-
start_scheduler()
|
134 |
-
|
135 |
-
app.run(host='0.0.0.0', port=5000, debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,191 +0,0 @@
|
|
1 |
-
# Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
|
2 |
-
|
3 |
-
import datetime
|
4 |
-
import os
|
5 |
-
|
6 |
-
import gradio as gr
|
7 |
-
import matplotlib.pyplot as plt
|
8 |
-
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
-
from dotenv import load_dotenv
|
10 |
-
from huggingface_hub import HfApi
|
11 |
-
|
12 |
-
import competitions
|
13 |
-
import utils
|
14 |
-
|
15 |
-
FONT = (
|
16 |
-
"""<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
|
17 |
-
)
|
18 |
-
TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
|
19 |
-
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
|
20 |
-
EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>"""
|
21 |
-
|
22 |
-
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
|
23 |
-
SECONDS_PER_BLOCK = 12
|
24 |
-
|
25 |
-
load_dotenv()
|
26 |
-
|
27 |
-
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
28 |
-
API = HfApi(token=HF_TOKEN)
|
29 |
-
|
30 |
-
|
31 |
-
def get_next_update_div(current_block: int, next_update_block: int) -> str:
|
32 |
-
now = datetime.datetime.now()
|
33 |
-
blocks_to_go = next_update_block - current_block
|
34 |
-
next_update_time = now + datetime.timedelta(
|
35 |
-
seconds=blocks_to_go * SECONDS_PER_BLOCK
|
36 |
-
)
|
37 |
-
delta = next_update_time - now
|
38 |
-
return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
|
39 |
-
|
40 |
-
|
41 |
-
def get_last_updated_div() -> str:
|
42 |
-
return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
|
43 |
-
|
44 |
-
|
45 |
-
def restart_space():
|
46 |
-
API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
|
47 |
-
|
48 |
-
|
49 |
-
def main():
|
50 |
-
# To avoid leaderboard failures, infinitely try until we get all data
|
51 |
-
# needed to populate the dashboard
|
52 |
-
|
53 |
-
state_vars = utils.load_state_vars()
|
54 |
-
model_data = state_vars["model_data"]
|
55 |
-
vali_runs = state_vars["vali_runs"]
|
56 |
-
scores = state_vars["scores"]
|
57 |
-
validator_df = state_vars["validator_df"]
|
58 |
-
benchmarks_df = state_vars["benchmarks_df"]
|
59 |
-
benchmarks_targets = state_vars["benchmarks_targets"]
|
60 |
-
|
61 |
-
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
|
62 |
-
with demo:
|
63 |
-
gr.HTML(FONT)
|
64 |
-
gr.HTML(TITLE)
|
65 |
-
gr.HTML(HEADER)
|
66 |
-
|
67 |
-
gr.Label(
|
68 |
-
label="Emissions",
|
69 |
-
value={
|
70 |
-
f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
|
71 |
-
for c in model_data
|
72 |
-
if c.incentive
|
73 |
-
},
|
74 |
-
num_top_classes=10,
|
75 |
-
)
|
76 |
-
|
77 |
-
comp_ids = [2, 3]
|
78 |
-
with gr.Accordion("Competition Results"):
|
79 |
-
gr.HTML(EVALUATION_HEADER)
|
80 |
-
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
|
81 |
-
competition_leaderboards = []
|
82 |
-
for comp_id in comp_ids:
|
83 |
-
details = competitions.COMPETITION_DETAILS[comp_id]
|
84 |
-
with gr.Accordion(f"{details.name} Competition"):
|
85 |
-
gr.HTML(details.html_description)
|
86 |
-
competition_leaderboards.append(
|
87 |
-
gr.components.Dataframe(
|
88 |
-
value=utils.leaderboard_data(
|
89 |
-
model_data, scores, comp_id, show_stale.value
|
90 |
-
),
|
91 |
-
headers=[
|
92 |
-
"Name",
|
93 |
-
"Win Rate",
|
94 |
-
"Score",
|
95 |
-
"Weight",
|
96 |
-
"UID",
|
97 |
-
"Block",
|
98 |
-
],
|
99 |
-
datatype=[
|
100 |
-
"markdown",
|
101 |
-
"number",
|
102 |
-
"number",
|
103 |
-
"number",
|
104 |
-
"number",
|
105 |
-
"number",
|
106 |
-
],
|
107 |
-
elem_id=f"comp{comp_id}-table",
|
108 |
-
interactive=False,
|
109 |
-
visible=True,
|
110 |
-
)
|
111 |
-
)
|
112 |
-
gr.HTML(
|
113 |
-
"""
|
114 |
-
<ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li>
|
115 |
-
<li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li>
|
116 |
-
<li><b>Score:</b> the combined model score as calculated by the OTF validator (lower is better)</li>
|
117 |
-
<li><b>UID:</b> the Bittensor UID of the miner</li>
|
118 |
-
<li><b>Weight:</b> the bittensor weight set for this model</li>
|
119 |
-
<li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>."""
|
120 |
-
)
|
121 |
-
show_stale.change(
|
122 |
-
lambda stale: [
|
123 |
-
utils.leaderboard_data(model_data, scores, id, stale)
|
124 |
-
for id in comp_ids
|
125 |
-
],
|
126 |
-
inputs=[show_stale],
|
127 |
-
outputs=competition_leaderboards,
|
128 |
-
)
|
129 |
-
|
130 |
-
if benchmarks_df is not None:
|
131 |
-
|
132 |
-
def create_benchmark_plot(benchmark: str, comp_id: int):
|
133 |
-
fig = plt.figure(figsize=(10, 8))
|
134 |
-
|
135 |
-
# Filter to just entries for this competition.
|
136 |
-
df = benchmarks_df[benchmarks_df["competition_id"] == comp_id]
|
137 |
-
|
138 |
-
plt.plot(df["timestamp"], df[benchmark])
|
139 |
-
|
140 |
-
# Adding horizontal dotted lines for various benchmark targets (well-known models)
|
141 |
-
for model, score in benchmarks_targets[benchmark].items():
|
142 |
-
plt.axhline(y=score, linestyle="--", label=f"{model}")
|
143 |
-
plt.text(
|
144 |
-
benchmarks_df["timestamp"].max(),
|
145 |
-
score,
|
146 |
-
f"{model}",
|
147 |
-
va="center",
|
148 |
-
ha="right",
|
149 |
-
backgroundcolor="white",
|
150 |
-
)
|
151 |
-
|
152 |
-
# Adding labels and title
|
153 |
-
plt.ylabel(benchmark.upper())
|
154 |
-
plt.title(f"{benchmark.upper()} Over Time")
|
155 |
-
plt.xticks(rotation=45)
|
156 |
-
|
157 |
-
return fig
|
158 |
-
|
159 |
-
with gr.Accordion("Top Model Benchmarks"):
|
160 |
-
for comp_id in comp_ids:
|
161 |
-
details = competitions.COMPETITION_DETAILS[comp_id]
|
162 |
-
with gr.Accordion(f"{details.name} Benchmarks"):
|
163 |
-
mmlu = create_benchmark_plot("mmlu", comp_id)
|
164 |
-
mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id)
|
165 |
-
gr.Plot(mmlu)
|
166 |
-
gr.Plot(mmlu_pro)
|
167 |
-
gr.HTML(
|
168 |
-
"""<div>Benchmarks computed using <a href='https://github.com/EleutherAI/lm-evaluation-harness'>lm-eval harness</a></div>"""
|
169 |
-
)
|
170 |
-
gr.HTML(
|
171 |
-
"""<ul><li>MMLU: Raw score</li><li>MMLU Pro: Normalized score using <a href='https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization'>this</a> method</li></ul>"""
|
172 |
-
)
|
173 |
-
|
174 |
-
with gr.Accordion("Validator Stats"):
|
175 |
-
gr.components.Dataframe(
|
176 |
-
utils.make_validator_dataframe(validator_df, model_data),
|
177 |
-
interactive=False,
|
178 |
-
visible=True,
|
179 |
-
)
|
180 |
-
gr.HTML(value=get_last_updated_div())
|
181 |
-
|
182 |
-
scheduler = BackgroundScheduler()
|
183 |
-
scheduler.add_job(
|
184 |
-
restart_space, "interval", seconds=60 * 30
|
185 |
-
) # restart every 15 minutes
|
186 |
-
scheduler.start()
|
187 |
-
|
188 |
-
demo.launch()
|
189 |
-
|
190 |
-
|
191 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
competitions.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
import html
|
3 |
-
from typing import Dict
|
4 |
-
|
5 |
-
|
6 |
-
@dataclass(frozen=True)
|
7 |
-
class CompetitionDetails:
|
8 |
-
# The display name of the competition.
|
9 |
-
name: str
|
10 |
-
|
11 |
-
# The HTML description of the competition.
|
12 |
-
html_description: str
|
13 |
-
|
14 |
-
|
15 |
-
# A map of competition IDs to HTML descriptions.
|
16 |
-
COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
|
17 |
-
1: CompetitionDetails(
|
18 |
-
name="SN9_MODEL",
|
19 |
-
html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18.""",
|
20 |
-
),
|
21 |
-
2: CompetitionDetails(
|
22 |
-
name="General Knowledge Chat-bot",
|
23 |
-
# TODO: Add link to SN1 dataset details.
|
24 |
-
html_description="""<b>Competition ID 2</b><br/>Produce the best general knowledge chat-bot. Models are evaluated using synthetic MMLU-like dataset from Subnet 1.""",
|
25 |
-
),
|
26 |
-
3: CompetitionDetails(
|
27 |
-
name="General Knowledge Chat-bot (BYO tokenizer)",
|
28 |
-
html_description="""<b>Competition ID 3</b><br/>Produce the best general knowledge chat-bot. Models bring their own tokenizer and are evaluated using synthetic MMLU-like dataset from Subnet 1.""",
|
29 |
-
)
|
30 |
-
}
|
31 |
-
|
32 |
-
COMP_NAME_TO_ID = {
|
33 |
-
"B7_MULTI_CHOICE": 2,
|
34 |
-
"INSTRUCT_8B": 3,
|
35 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
bittensor==7.3.1
|
2 |
-
requests
|
3 |
-
wandb==0.17.1
|
4 |
-
numpy==1.26.4
|
5 |
-
python-dotenv
|
6 |
-
APScheduler
|
7 |
-
huggingface-hub
|
8 |
-
gradio
|
9 |
-
pandas
|
10 |
-
flask
|
11 |
-
matplotlib
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
DELETED
@@ -1,505 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import datetime
|
3 |
-
import functools
|
4 |
-
import json
|
5 |
-
import math
|
6 |
-
import os
|
7 |
-
import time
|
8 |
-
import traceback
|
9 |
-
from dataclasses import dataclass
|
10 |
-
from typing import Any, Dict, List, Optional, Tuple
|
11 |
-
|
12 |
-
import bittensor as bt
|
13 |
-
import numpy as np
|
14 |
-
import pandas as pd
|
15 |
-
import wandb
|
16 |
-
from bittensor.extrinsics.serving import get_metadata
|
17 |
-
from dotenv import load_dotenv
|
18 |
-
from wandb.apis.public.history import HistoryScan, SampledHistoryScan
|
19 |
-
|
20 |
-
from competitions import COMP_NAME_TO_ID
|
21 |
-
|
22 |
-
NETUID = 37
|
23 |
-
DELAY_SECS = 3
|
24 |
-
RETRIES = 3
|
25 |
-
|
26 |
-
load_dotenv()
|
27 |
-
|
28 |
-
WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
|
29 |
-
SUBTENSOR_ENDPOINT = os.environ.get("SUBTENSOR_ENDPOINT", None)
|
30 |
-
VALIDATOR_WANDB_PROJECT = "rusticluftig/finetuning"
|
31 |
-
BENCHMARK_WANDB_PROJECT = "rusticluftig/test-benchmarks"
|
32 |
-
|
33 |
-
|
34 |
-
@dataclass(frozen=True)
|
35 |
-
class ModelData:
|
36 |
-
uid: int
|
37 |
-
hotkey: str
|
38 |
-
competition_id: int
|
39 |
-
namespace: str
|
40 |
-
name: str
|
41 |
-
commit: str
|
42 |
-
|
43 |
-
# Hash of (hash(model) + hotkey)
|
44 |
-
secure_hash: str
|
45 |
-
block: int
|
46 |
-
incentive: float
|
47 |
-
emission: float
|
48 |
-
|
49 |
-
@classmethod
|
50 |
-
def from_compressed_str(
|
51 |
-
cls,
|
52 |
-
uid: int,
|
53 |
-
hotkey: str,
|
54 |
-
cs: str,
|
55 |
-
block: int,
|
56 |
-
incentive: float,
|
57 |
-
emission: float,
|
58 |
-
):
|
59 |
-
"""Returns an instance of this class from a compressed string representation"""
|
60 |
-
tokens = cs.split(":")
|
61 |
-
return ModelData(
|
62 |
-
uid=uid,
|
63 |
-
hotkey=hotkey,
|
64 |
-
namespace=tokens[0],
|
65 |
-
name=tokens[1],
|
66 |
-
commit=tokens[2],
|
67 |
-
secure_hash=tokens[3],
|
68 |
-
competition_id=int(tokens[4]),
|
69 |
-
block=block,
|
70 |
-
incentive=incentive,
|
71 |
-
emission=emission,
|
72 |
-
)
|
73 |
-
|
74 |
-
|
75 |
-
def run_with_retries(func, *args, **kwargs):
|
76 |
-
"""Runs a provided function with retries in the event of a failure."""
|
77 |
-
for i in range(0, RETRIES):
|
78 |
-
try:
|
79 |
-
return func(*args, **kwargs)
|
80 |
-
except (Exception, RuntimeError):
|
81 |
-
print(f"Failed to run function: {traceback.format_exc()}")
|
82 |
-
if i == RETRIES - 1:
|
83 |
-
raise
|
84 |
-
time.sleep(DELAY_SECS)
|
85 |
-
raise RuntimeError("Should never happen")
|
86 |
-
|
87 |
-
|
88 |
-
def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
|
89 |
-
"""Returns a subtensor and metagraph for the finetuning subnet."""
|
90 |
-
|
91 |
-
def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
|
92 |
-
if SUBTENSOR_ENDPOINT:
|
93 |
-
parser = argparse.ArgumentParser()
|
94 |
-
bt.subtensor.add_args(parser)
|
95 |
-
subtensor = bt.subtensor(
|
96 |
-
config=bt.config(
|
97 |
-
parser=parser,
|
98 |
-
args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT],
|
99 |
-
)
|
100 |
-
)
|
101 |
-
else:
|
102 |
-
subtensor = bt.subtensor("finney")
|
103 |
-
|
104 |
-
metagraph = subtensor.metagraph(NETUID, lite=False)
|
105 |
-
|
106 |
-
return subtensor, metagraph
|
107 |
-
|
108 |
-
return run_with_retries(_internal)
|
109 |
-
|
110 |
-
|
111 |
-
def get_subnet_data(
|
112 |
-
subtensor: bt.subtensor, metagraph: bt.metagraph
|
113 |
-
) -> List[ModelData]:
|
114 |
-
result = []
|
115 |
-
for uid in metagraph.uids.tolist():
|
116 |
-
hotkey = metagraph.hotkeys[uid]
|
117 |
-
metadata = None
|
118 |
-
try:
|
119 |
-
metadata = run_with_retries(
|
120 |
-
functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
|
121 |
-
)
|
122 |
-
except:
|
123 |
-
print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
|
124 |
-
|
125 |
-
if not metadata:
|
126 |
-
continue
|
127 |
-
|
128 |
-
commitment = metadata["info"]["fields"][0]
|
129 |
-
hex_data = commitment[list(commitment.keys())[0]][2:]
|
130 |
-
chain_str = bytes.fromhex(hex_data).decode()
|
131 |
-
block = metadata["block"]
|
132 |
-
|
133 |
-
incentive = np.nan_to_num(metagraph.incentive[uid]).item()
|
134 |
-
emission = (
|
135 |
-
np.nan_to_num(metagraph.emission[uid]).item() * 20
|
136 |
-
) # convert to daily TAO
|
137 |
-
|
138 |
-
model_data = None
|
139 |
-
try:
|
140 |
-
model_data = ModelData.from_compressed_str(
|
141 |
-
uid, hotkey, chain_str, block, incentive, emission
|
142 |
-
)
|
143 |
-
except:
|
144 |
-
continue
|
145 |
-
|
146 |
-
result.append(model_data)
|
147 |
-
return result
|
148 |
-
|
149 |
-
|
150 |
-
def get_wandb_runs(
|
151 |
-
project: str, filters: Dict[str, Any], order: str = "-created_at"
|
152 |
-
) -> List:
|
153 |
-
"""Get the latest runs from Wandb, retrying infinitely until we get them.
|
154 |
-
|
155 |
-
Args:
|
156 |
-
project (str): The Wandb project to get runs from.
|
157 |
-
filters (Dict[str, Any]): Filters to apply to the runs.
|
158 |
-
order (str): Order to sort the runs by. Defaults to "-created_at" (newest first)
|
159 |
-
|
160 |
-
Returns:
|
161 |
-
List: List of runs matching the provided filters
|
162 |
-
"""
|
163 |
-
while True:
|
164 |
-
api = wandb.Api(api_key=WANDB_TOKEN, timeout=100)
|
165 |
-
runs = list(
|
166 |
-
api.runs(
|
167 |
-
project,
|
168 |
-
filters=filters,
|
169 |
-
order=order,
|
170 |
-
)
|
171 |
-
)
|
172 |
-
if len(runs) > 0:
|
173 |
-
return runs
|
174 |
-
# WandDB API is quite unreliable. Wait another minute and try again.
|
175 |
-
print("Failed to get runs from Wandb. Trying again in 60 seconds.")
|
176 |
-
time.sleep(60)
|
177 |
-
|
178 |
-
|
179 |
-
def get_scores(
|
180 |
-
uids: List[int],
|
181 |
-
wandb_runs: List,
|
182 |
-
) -> Dict[int, Dict[str, Optional[float]]]:
|
183 |
-
"""Returns the most recent scores for the provided UIDs.
|
184 |
-
|
185 |
-
Args:
|
186 |
-
uids (List[int]): List of UIDs to get scores for.
|
187 |
-
wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
|
188 |
-
"""
|
189 |
-
result = {}
|
190 |
-
previous_timestamp = None
|
191 |
-
seen_competitions = set()
|
192 |
-
# Iterate through the runs until we've processed all the uids.
|
193 |
-
for i, run in enumerate(wandb_runs):
|
194 |
-
if not "original_format_json" in run.summary:
|
195 |
-
continue
|
196 |
-
data = json.loads(run.summary["original_format_json"])
|
197 |
-
all_uid_data = data["uid_data"]
|
198 |
-
timestamp = data["timestamp"]
|
199 |
-
# Make sure runs are indeed in descending time order.
|
200 |
-
assert (
|
201 |
-
previous_timestamp is None or timestamp < previous_timestamp
|
202 |
-
), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
|
203 |
-
previous_timestamp = timestamp
|
204 |
-
|
205 |
-
comp_id = data.get("competition_id", None)
|
206 |
-
for uid in uids:
|
207 |
-
if uid in result:
|
208 |
-
continue
|
209 |
-
if str(uid) in all_uid_data:
|
210 |
-
uid_data = all_uid_data[str(uid)]
|
211 |
-
# Only the most recent run per competition is fresh.
|
212 |
-
is_fresh = comp_id not in seen_competitions
|
213 |
-
result[uid] = {
|
214 |
-
"avg_loss": uid_data.get("average_loss", None),
|
215 |
-
"win_rate": uid_data.get("win_rate", None),
|
216 |
-
"win_total": uid_data.get("win_total", None),
|
217 |
-
"weight": uid_data.get("weight", None),
|
218 |
-
"competition_id": uid_data.get("competition_id", None),
|
219 |
-
"fresh": is_fresh,
|
220 |
-
}
|
221 |
-
seen_competitions.add(comp_id)
|
222 |
-
if len(result) == len(uids):
|
223 |
-
break
|
224 |
-
return result
|
225 |
-
|
226 |
-
|
227 |
-
def get_validator_weights(
|
228 |
-
metagraph: bt.metagraph,
|
229 |
-
) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
|
230 |
-
"""Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
|
231 |
-
ret = {}
|
232 |
-
for uid in metagraph.uids.tolist():
|
233 |
-
vtrust = metagraph.validator_trust[uid].item()
|
234 |
-
stake = metagraph.stake[uid].item()
|
235 |
-
if vtrust > 0 and stake > 10_000:
|
236 |
-
ret[uid] = (vtrust, stake, {})
|
237 |
-
for ouid in metagraph.uids.tolist():
|
238 |
-
if ouid == uid:
|
239 |
-
continue
|
240 |
-
weight = round(metagraph.weights[uid][ouid].item(), 4)
|
241 |
-
if weight > 0:
|
242 |
-
ret[uid][-1][ouid] = weight
|
243 |
-
return ret
|
244 |
-
|
245 |
-
|
246 |
-
def get_losses_over_time(wandb_runs: List, competition_id: int) -> pd.DataFrame:
|
247 |
-
"""Returns a dataframe of the best average model loss over time."""
|
248 |
-
timestamps = []
|
249 |
-
losses = []
|
250 |
-
|
251 |
-
for run in wandb_runs:
|
252 |
-
# For each run, check the 10 most recent steps.
|
253 |
-
best_loss = math.inf
|
254 |
-
should_add_datapoint = False
|
255 |
-
min_step = max(0, run.lastHistoryStep - 10)
|
256 |
-
history_scan = SampledHistoryScan(
|
257 |
-
run.client,
|
258 |
-
run,
|
259 |
-
["original_format_json"],
|
260 |
-
min_step,
|
261 |
-
run.lastHistoryStep,
|
262 |
-
page_size=10,
|
263 |
-
)
|
264 |
-
max_timestamp = None
|
265 |
-
for step in history_scan:
|
266 |
-
data = json.loads(step["original_format_json"])
|
267 |
-
all_uid_data = data["uid_data"]
|
268 |
-
timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
|
269 |
-
if max_timestamp is None:
|
270 |
-
max_timestamp = timestamp
|
271 |
-
max_timestamp = max(max_timestamp, timestamp)
|
272 |
-
|
273 |
-
for _, uid_data in all_uid_data.items():
|
274 |
-
loss = uid_data.get("average_loss", math.inf)
|
275 |
-
c_id = uid_data.get("competition_id", None)
|
276 |
-
if c_id is None or c_id != competition_id:
|
277 |
-
continue
|
278 |
-
|
279 |
-
# Filter out issue caused by wandb unavailability.
|
280 |
-
if loss < 0.99 and loss < best_loss:
|
281 |
-
best_loss = loss
|
282 |
-
should_add_datapoint = True
|
283 |
-
# Now that we've processed the run's most recent steps, check if we should add a datapoint.
|
284 |
-
if should_add_datapoint:
|
285 |
-
timestamps.append(max_timestamp)
|
286 |
-
losses.append(best_loss)
|
287 |
-
|
288 |
-
return pd.DataFrame({"timestamp": timestamps, "losses": losses})
|
289 |
-
|
290 |
-
|
291 |
-
def is_floatable(x) -> bool:
|
292 |
-
return (
|
293 |
-
isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
|
294 |
-
) or isinstance(x, int)
|
295 |
-
|
296 |
-
|
297 |
-
def format_score(uid: int, scores, key) -> Optional[float]:
|
298 |
-
if uid in scores:
|
299 |
-
if key in scores[uid]:
|
300 |
-
point = scores[uid][key]
|
301 |
-
if is_floatable(point):
|
302 |
-
return round(scores[uid][key], 4)
|
303 |
-
return None
|
304 |
-
|
305 |
-
|
306 |
-
def leaderboard_data(
|
307 |
-
leaderboard: List[ModelData],
|
308 |
-
scores: Dict[int, Dict[str, Optional[float]]],
|
309 |
-
competition_id: int,
|
310 |
-
show_stale: bool,
|
311 |
-
) -> List[List[Any]]:
|
312 |
-
"""Returns the leaderboard data, based on models data and UID scores."""
|
313 |
-
return [
|
314 |
-
[
|
315 |
-
f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
|
316 |
-
format_score(c.uid, scores, "win_rate"),
|
317 |
-
format_score(c.uid, scores, "avg_loss"),
|
318 |
-
format_score(c.uid, scores, "weight"),
|
319 |
-
c.uid,
|
320 |
-
c.block,
|
321 |
-
]
|
322 |
-
for c in leaderboard
|
323 |
-
if c.competition_id == competition_id
|
324 |
-
and ((c.uid in scores and scores[c.uid]["fresh"]) or show_stale)
|
325 |
-
]
|
326 |
-
|
327 |
-
|
328 |
-
def get_benchmarks() -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]:
|
329 |
-
"""Returns the latest benchmarks and the time they were run."""
|
330 |
-
if not BENCHMARK_WANDB_PROJECT:
|
331 |
-
print("No benchmark project set.")
|
332 |
-
return None, None
|
333 |
-
runs = get_wandb_runs(
|
334 |
-
project=BENCHMARK_WANDB_PROJECT, filters=None, order="+created_at"
|
335 |
-
)
|
336 |
-
timestamps, uids, models, comp_ids, mmlu, mmlu_pro = [], [], [], [], [], []
|
337 |
-
for run in runs:
|
338 |
-
uid = run.config.get("uid", None)
|
339 |
-
model = run.config.get("model", None)
|
340 |
-
# Any run without a competition_id was for competition 2.
|
341 |
-
comp_name = run.config.get("competition_id", "B7_MULTI_CHOICE")
|
342 |
-
comp_id = COMP_NAME_TO_ID.get(comp_name, 2)
|
343 |
-
if not uid or not model:
|
344 |
-
continue
|
345 |
-
samples = list(
|
346 |
-
HistoryScan(
|
347 |
-
run.client,
|
348 |
-
run,
|
349 |
-
0,
|
350 |
-
1,
|
351 |
-
)
|
352 |
-
)
|
353 |
-
if not samples:
|
354 |
-
continue
|
355 |
-
sample = samples[0]
|
356 |
-
|
357 |
-
# Make sure we have all the required keys.
|
358 |
-
has_all_keys = True
|
359 |
-
for required_key in ["mmlu.acc,none", "mmlu_pro", "_timestamp"]:
|
360 |
-
if required_key not in sample:
|
361 |
-
has_all_keys = False
|
362 |
-
break
|
363 |
-
if not has_all_keys:
|
364 |
-
continue
|
365 |
-
|
366 |
-
comp_ids.append(comp_id)
|
367 |
-
timestamps.append(datetime.datetime.fromtimestamp(sample["_timestamp"]))
|
368 |
-
mmlu.append(sample["mmlu.acc,none"])
|
369 |
-
mmlu_pro.append(sample["mmlu_pro"])
|
370 |
-
uids.append(uid)
|
371 |
-
models.append(model)
|
372 |
-
return (
|
373 |
-
pd.DataFrame(
|
374 |
-
{
|
375 |
-
"timestamp": timestamps,
|
376 |
-
"uid": uids,
|
377 |
-
"model": models,
|
378 |
-
"competition_id": comp_ids,
|
379 |
-
"mmlu": mmlu,
|
380 |
-
"mmlu_pro": mmlu_pro,
|
381 |
-
}
|
382 |
-
),
|
383 |
-
{
|
384 |
-
"mmlu": {
|
385 |
-
"Llama-3.1-8B-Instruct": 0.681,
|
386 |
-
"Mistral-7B-Instruct-v0.3": 0.597,
|
387 |
-
"gemma-2-9b-it": 0.719,
|
388 |
-
},
|
389 |
-
"mmlu_pro": {
|
390 |
-
"Llama-3.1-8B-Instruct": 30.68,
|
391 |
-
"Mistral-7B-Instruct-v0.3": 23.06,
|
392 |
-
"gemma-2-9b-it": 31.95,
|
393 |
-
},
|
394 |
-
},
|
395 |
-
)
|
396 |
-
|
397 |
-
|
398 |
-
def make_validator_dataframe(
|
399 |
-
validator_df: pd.DataFrame, model_data: ModelData
|
400 |
-
) -> pd.DataFrame:
|
401 |
-
|
402 |
-
values = [
|
403 |
-
[uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
|
404 |
-
+ [validator_df[uid][-1].get(c.uid) for c in model_data if c.incentive]
|
405 |
-
for uid, _ in sorted(
|
406 |
-
zip(
|
407 |
-
validator_df.keys(),
|
408 |
-
[validator_df[x][1] for x in validator_df.keys()],
|
409 |
-
),
|
410 |
-
key=lambda x: x[1],
|
411 |
-
reverse=True,
|
412 |
-
)
|
413 |
-
]
|
414 |
-
dtypes = {"UID": int, "Stake (τ)": float, "V-Trust": float}
|
415 |
-
dtypes.update(
|
416 |
-
{
|
417 |
-
f"{c.namespace}/{c.name} ({c.commit[0:8]})": float
|
418 |
-
for c in model_data
|
419 |
-
if c.incentive
|
420 |
-
}
|
421 |
-
)
|
422 |
-
return pd.DataFrame(values, columns=dtypes.keys()).astype(dtypes)
|
423 |
-
|
424 |
-
|
425 |
-
def make_metagraph_dataframe(metagraph: bt.metagraph, weights=False) -> pd.DataFrame:
|
426 |
-
|
427 |
-
cols = [
|
428 |
-
"stake",
|
429 |
-
"emission",
|
430 |
-
"trust",
|
431 |
-
"validator_trust",
|
432 |
-
"dividends",
|
433 |
-
"incentive",
|
434 |
-
"R",
|
435 |
-
"consensus",
|
436 |
-
"validator_permit",
|
437 |
-
]
|
438 |
-
|
439 |
-
frame = pd.DataFrame({k: getattr(metagraph, k) for k in cols})
|
440 |
-
frame["block"] = metagraph.block.item()
|
441 |
-
frame["netuid"] = NETUID
|
442 |
-
frame["uid"] = range(len(frame))
|
443 |
-
frame["hotkey"] = [axon.hotkey for axon in metagraph.axons]
|
444 |
-
frame["coldkey"] = [axon.coldkey for axon in metagraph.axons]
|
445 |
-
if weights and metagraph.W is not None:
|
446 |
-
# convert NxN tensor to a list of lists so it fits into the dataframe
|
447 |
-
frame["weights"] = [w.tolist() for w in metagraph.W]
|
448 |
-
|
449 |
-
return frame
|
450 |
-
|
451 |
-
|
452 |
-
def load_state_vars() -> dict[Any]:
|
453 |
-
while True:
|
454 |
-
try:
|
455 |
-
subtensor, metagraph = get_subtensor_and_metagraph()
|
456 |
-
|
457 |
-
print(f"Loaded subtensor and metagraph: {metagraph}")
|
458 |
-
|
459 |
-
model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
|
460 |
-
model_data.sort(key=lambda x: x.incentive, reverse=True)
|
461 |
-
print(f"Loaded {len(model_data)} models")
|
462 |
-
|
463 |
-
vali_runs = get_wandb_runs(
|
464 |
-
project=VALIDATOR_WANDB_PROJECT,
|
465 |
-
filters={
|
466 |
-
"$and": [{"config.type": "validator"}],
|
467 |
-
"$or": [{"config.uid": 28}, {"config.uid": 16}],
|
468 |
-
},
|
469 |
-
)
|
470 |
-
print(f"Loaded {len(vali_runs)} validator runs")
|
471 |
-
|
472 |
-
scores = get_scores([x.uid for x in model_data], vali_runs)
|
473 |
-
print(f"Loaded {len(scores)} scores")
|
474 |
-
|
475 |
-
validator_df = get_validator_weights(metagraph)
|
476 |
-
weight_keys = set()
|
477 |
-
for uid, stats in validator_df.items():
|
478 |
-
weight_keys.update(stats[-1].keys())
|
479 |
-
print("Loaded validator weights")
|
480 |
-
|
481 |
-
# Compute loss over time for all competitions.
|
482 |
-
# losses_2 = get_losses_over_time(vali_runs, 2)
|
483 |
-
# print("Loaded losses over time for comp 2")
|
484 |
-
|
485 |
-
benchmarks_df, benchmarks_targets = get_benchmarks()
|
486 |
-
print("Loaded benchmarks")
|
487 |
-
break
|
488 |
-
|
489 |
-
except KeyboardInterrupt:
|
490 |
-
print("Exiting...")
|
491 |
-
break
|
492 |
-
|
493 |
-
except Exception as e:
|
494 |
-
print(f"Failed to get data: {traceback.format_exc()}")
|
495 |
-
time.sleep(30)
|
496 |
-
|
497 |
-
return {
|
498 |
-
"metagraph": metagraph,
|
499 |
-
"model_data": model_data,
|
500 |
-
"vali_runs": vali_runs,
|
501 |
-
"scores": scores,
|
502 |
-
"validator_df": validator_df,
|
503 |
-
"benchmarks_df": benchmarks_df,
|
504 |
-
"benchmarks_targets": benchmarks_targets,
|
505 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|