Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +0 -4
  3. api.py +0 -135
  4. app.py +0 -191
  5. competitions.py +0 -35
  6. requirements.txt +0 -12
  7. utils.py +0 -505
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,4 +0,0 @@
1
- .venv
2
- __pycache__/
3
- .env
4
- **.ipynb
 
 
 
 
 
api.py DELETED
@@ -1,135 +0,0 @@
1
-
2
- import atexit
3
- import datetime
4
-
5
- from flask import Flask, request, jsonify
6
- from apscheduler.schedulers.background import BackgroundScheduler
7
-
8
- import utils
9
-
10
- app = Flask(__name__)
11
-
12
- # Global variables (saves time on loading data)
13
- state_vars = None
14
- reload_timestamp = datetime.datetime.now().strftime('%D %T')
15
-
16
-
17
- def load_data(test=False):
18
- """
19
- Reload the state variables
20
- """
21
- global state_vars, reload_timestamp
22
- if test:
23
- state_vars = utils.test_load_state_vars()
24
- else:
25
- state_vars = utils.load_state_vars()
26
-
27
- reload_timestamp = datetime.datetime.now().strftime('%D %T')
28
-
29
- print(f'Reloaded data at {reload_timestamp}')
30
-
31
-
32
- def start_scheduler():
33
- scheduler = BackgroundScheduler()
34
- scheduler.add_job(func=load_data, trigger="interval", seconds=60*30)
35
- scheduler.start()
36
-
37
- # Shut down the scheduler when exiting the app
38
- atexit.register(lambda: scheduler.shutdown())
39
-
40
-
41
- @app.route('/', methods=['GET'])
42
- def home():
43
- return "Welcome to the Bittensor Pretraining Leaderboard API!"
44
-
45
-
46
- @app.route('/updated', methods=['GET'])
47
- def updated():
48
- return reload_timestamp
49
-
50
-
51
- @app.route('/benchmark', methods=['GET'])
52
- def benchmark():
53
- """
54
- Get the benchmarks and the timestamp
55
-
56
- Returns:
57
- - benchmarks: List of dicts (from pandas DataFrame)
58
- - benchmark_timestamp: String
59
- """
60
-
61
- benchmarks = state_vars.get("benchmarks", None)
62
- benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
63
-
64
- return jsonify(
65
- {
66
- "benchmarks": benchmarks.to_dict(orient='records'),
67
- "benchmark_timestamp": benchmark_timestamp.strftime('%Y-%m-%d %H:%M:%S')
68
- }
69
- )
70
-
71
- @app.route('/metagraph', methods=['GET'])
72
- def metagraph():
73
- """
74
- Get the metagraph data
75
- Returns:
76
- - metagraph_data: List of dicts (from pandas DataFrame)
77
- """
78
-
79
- metagraph = state_vars["metagraph"]
80
-
81
- return jsonify(
82
- utils.make_metagraph_dataframe(metagraph).to_dict(orient='records')
83
- )
84
-
85
- @app.route('/leaderboard', methods=['GET'])
86
- def leaderboard():
87
- """
88
- Get the leaderboard data
89
- Returns:
90
- - leaderboard_data: List of dicts (from pandas DataFrame)
91
- """
92
-
93
- model_data = state_vars["model_data"]
94
- scores = state_vars["scores"]
95
- show_stale = request.args.get('show_stale')
96
- return jsonify(
97
- utils.leaderboard_data(model_data, scores, show_stale=show_stale)
98
- )
99
-
100
-
101
- @app.route('/loss', methods=['GET'])
102
- def loss():
103
- """
104
- Get the losses over time
105
- Returns:
106
- - losses_over_time: List of dicts (from pandas DataFrame)
107
- """
108
- vali_runs = state_vars["vali_runs"]
109
-
110
- return jsonify(
111
- utils.get_losses_over_time(vali_runs).to_dict(orient='records')
112
- )
113
-
114
-
115
- @app.route('/validator', methods=['GET'])
116
- def validator():
117
- """
118
- Get the validator data
119
- Returns:
120
- - validator_data: List of dicts (from pandas DataFrame)
121
- """
122
- model_data = state_vars["model_data"]
123
- validator_df = state_vars["validator_df"]
124
-
125
- return jsonify(
126
- utils.make_validator_dataframe(validator_df, model_data).to_dict(orient='records')
127
- )
128
-
129
-
130
- if __name__ == '__main__':
131
-
132
- load_data()
133
- start_scheduler()
134
-
135
- app.run(host='0.0.0.0', port=5000, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,191 +0,0 @@
1
- # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
2
-
3
- import datetime
4
- import os
5
-
6
- import gradio as gr
7
- import matplotlib.pyplot as plt
8
- from apscheduler.schedulers.background import BackgroundScheduler
9
- from dotenv import load_dotenv
10
- from huggingface_hub import HfApi
11
-
12
- import competitions
13
- import utils
14
-
15
- FONT = (
16
- """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
17
- )
18
- TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
19
- HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
20
- EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>"""
21
-
22
- HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
23
- SECONDS_PER_BLOCK = 12
24
-
25
- load_dotenv()
26
-
27
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
28
- API = HfApi(token=HF_TOKEN)
29
-
30
-
31
- def get_next_update_div(current_block: int, next_update_block: int) -> str:
32
- now = datetime.datetime.now()
33
- blocks_to_go = next_update_block - current_block
34
- next_update_time = now + datetime.timedelta(
35
- seconds=blocks_to_go * SECONDS_PER_BLOCK
36
- )
37
- delta = next_update_time - now
38
- return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
39
-
40
-
41
- def get_last_updated_div() -> str:
42
- return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
43
-
44
-
45
- def restart_space():
46
- API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
47
-
48
-
49
- def main():
50
- # To avoid leaderboard failures, infinitely try until we get all data
51
- # needed to populate the dashboard
52
-
53
- state_vars = utils.load_state_vars()
54
- model_data = state_vars["model_data"]
55
- vali_runs = state_vars["vali_runs"]
56
- scores = state_vars["scores"]
57
- validator_df = state_vars["validator_df"]
58
- benchmarks_df = state_vars["benchmarks_df"]
59
- benchmarks_targets = state_vars["benchmarks_targets"]
60
-
61
- demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
62
- with demo:
63
- gr.HTML(FONT)
64
- gr.HTML(TITLE)
65
- gr.HTML(HEADER)
66
-
67
- gr.Label(
68
- label="Emissions",
69
- value={
70
- f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
71
- for c in model_data
72
- if c.incentive
73
- },
74
- num_top_classes=10,
75
- )
76
-
77
- comp_ids = [2, 3]
78
- with gr.Accordion("Competition Results"):
79
- gr.HTML(EVALUATION_HEADER)
80
- show_stale = gr.Checkbox(label="Show Stale", interactive=True)
81
- competition_leaderboards = []
82
- for comp_id in comp_ids:
83
- details = competitions.COMPETITION_DETAILS[comp_id]
84
- with gr.Accordion(f"{details.name} Competition"):
85
- gr.HTML(details.html_description)
86
- competition_leaderboards.append(
87
- gr.components.Dataframe(
88
- value=utils.leaderboard_data(
89
- model_data, scores, comp_id, show_stale.value
90
- ),
91
- headers=[
92
- "Name",
93
- "Win Rate",
94
- "Score",
95
- "Weight",
96
- "UID",
97
- "Block",
98
- ],
99
- datatype=[
100
- "markdown",
101
- "number",
102
- "number",
103
- "number",
104
- "number",
105
- "number",
106
- ],
107
- elem_id=f"comp{comp_id}-table",
108
- interactive=False,
109
- visible=True,
110
- )
111
- )
112
- gr.HTML(
113
- """
114
- <ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li>
115
- <li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li>
116
- <li><b>Score:</b> the combined model score as calculated by the OTF validator (lower is better)</li>
117
- <li><b>UID:</b> the Bittensor UID of the miner</li>
118
- <li><b>Weight:</b> the bittensor weight set for this model</li>
119
- <li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>."""
120
- )
121
- show_stale.change(
122
- lambda stale: [
123
- utils.leaderboard_data(model_data, scores, id, stale)
124
- for id in comp_ids
125
- ],
126
- inputs=[show_stale],
127
- outputs=competition_leaderboards,
128
- )
129
-
130
- if benchmarks_df is not None:
131
-
132
- def create_benchmark_plot(benchmark: str, comp_id: int):
133
- fig = plt.figure(figsize=(10, 8))
134
-
135
- # Filter to just entries for this competition.
136
- df = benchmarks_df[benchmarks_df["competition_id"] == comp_id]
137
-
138
- plt.plot(df["timestamp"], df[benchmark])
139
-
140
- # Adding horizontal dotted lines for various benchmark targets (well-known models)
141
- for model, score in benchmarks_targets[benchmark].items():
142
- plt.axhline(y=score, linestyle="--", label=f"{model}")
143
- plt.text(
144
- benchmarks_df["timestamp"].max(),
145
- score,
146
- f"{model}",
147
- va="center",
148
- ha="right",
149
- backgroundcolor="white",
150
- )
151
-
152
- # Adding labels and title
153
- plt.ylabel(benchmark.upper())
154
- plt.title(f"{benchmark.upper()} Over Time")
155
- plt.xticks(rotation=45)
156
-
157
- return fig
158
-
159
- with gr.Accordion("Top Model Benchmarks"):
160
- for comp_id in comp_ids:
161
- details = competitions.COMPETITION_DETAILS[comp_id]
162
- with gr.Accordion(f"{details.name} Benchmarks"):
163
- mmlu = create_benchmark_plot("mmlu", comp_id)
164
- mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id)
165
- gr.Plot(mmlu)
166
- gr.Plot(mmlu_pro)
167
- gr.HTML(
168
- """<div>Benchmarks computed using <a href='https://github.com/EleutherAI/lm-evaluation-harness'>lm-eval harness</a></div>"""
169
- )
170
- gr.HTML(
171
- """<ul><li>MMLU: Raw score</li><li>MMLU Pro: Normalized score using <a href='https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization'>this</a> method</li></ul>"""
172
- )
173
-
174
- with gr.Accordion("Validator Stats"):
175
- gr.components.Dataframe(
176
- utils.make_validator_dataframe(validator_df, model_data),
177
- interactive=False,
178
- visible=True,
179
- )
180
- gr.HTML(value=get_last_updated_div())
181
-
182
- scheduler = BackgroundScheduler()
183
- scheduler.add_job(
184
- restart_space, "interval", seconds=60 * 30
185
- ) # restart every 15 minutes
186
- scheduler.start()
187
-
188
- demo.launch()
189
-
190
-
191
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
competitions.py DELETED
@@ -1,35 +0,0 @@
1
- from dataclasses import dataclass
2
- import html
3
- from typing import Dict
4
-
5
-
6
- @dataclass(frozen=True)
7
- class CompetitionDetails:
8
- # The display name of the competition.
9
- name: str
10
-
11
- # The HTML description of the competition.
12
- html_description: str
13
-
14
-
15
- # A map of competition IDs to HTML descriptions.
16
- COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
17
- 1: CompetitionDetails(
18
- name="SN9_MODEL",
19
- html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18.""",
20
- ),
21
- 2: CompetitionDetails(
22
- name="General Knowledge Chat-bot",
23
- # TODO: Add link to SN1 dataset details.
24
- html_description="""<b>Competition ID 2</b><br/>Produce the best general knowledge chat-bot. Models are evaluated using synthetic MMLU-like dataset from Subnet 1.""",
25
- ),
26
- 3: CompetitionDetails(
27
- name="General Knowledge Chat-bot (BYO tokenizer)",
28
- html_description="""<b>Competition ID 3</b><br/>Produce the best general knowledge chat-bot. Models bring their own tokenizer and are evaluated using synthetic MMLU-like dataset from Subnet 1.""",
29
- )
30
- }
31
-
32
- COMP_NAME_TO_ID = {
33
- "B7_MULTI_CHOICE": 2,
34
- "INSTRUCT_8B": 3,
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,12 +0,0 @@
1
- bittensor==7.3.1
2
- requests
3
- wandb==0.17.1
4
- numpy==1.26.4
5
- python-dotenv
6
- APScheduler
7
- huggingface-hub
8
- gradio
9
- pandas
10
- flask
11
- matplotlib
12
-
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py DELETED
@@ -1,505 +0,0 @@
1
- import argparse
2
- import datetime
3
- import functools
4
- import json
5
- import math
6
- import os
7
- import time
8
- import traceback
9
- from dataclasses import dataclass
10
- from typing import Any, Dict, List, Optional, Tuple
11
-
12
- import bittensor as bt
13
- import numpy as np
14
- import pandas as pd
15
- import wandb
16
- from bittensor.extrinsics.serving import get_metadata
17
- from dotenv import load_dotenv
18
- from wandb.apis.public.history import HistoryScan, SampledHistoryScan
19
-
20
- from competitions import COMP_NAME_TO_ID
21
-
22
- NETUID = 37
23
- DELAY_SECS = 3
24
- RETRIES = 3
25
-
26
- load_dotenv()
27
-
28
- WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
29
- SUBTENSOR_ENDPOINT = os.environ.get("SUBTENSOR_ENDPOINT", None)
30
- VALIDATOR_WANDB_PROJECT = "rusticluftig/finetuning"
31
- BENCHMARK_WANDB_PROJECT = "rusticluftig/test-benchmarks"
32
-
33
-
34
- @dataclass(frozen=True)
35
- class ModelData:
36
- uid: int
37
- hotkey: str
38
- competition_id: int
39
- namespace: str
40
- name: str
41
- commit: str
42
-
43
- # Hash of (hash(model) + hotkey)
44
- secure_hash: str
45
- block: int
46
- incentive: float
47
- emission: float
48
-
49
- @classmethod
50
- def from_compressed_str(
51
- cls,
52
- uid: int,
53
- hotkey: str,
54
- cs: str,
55
- block: int,
56
- incentive: float,
57
- emission: float,
58
- ):
59
- """Returns an instance of this class from a compressed string representation"""
60
- tokens = cs.split(":")
61
- return ModelData(
62
- uid=uid,
63
- hotkey=hotkey,
64
- namespace=tokens[0],
65
- name=tokens[1],
66
- commit=tokens[2],
67
- secure_hash=tokens[3],
68
- competition_id=int(tokens[4]),
69
- block=block,
70
- incentive=incentive,
71
- emission=emission,
72
- )
73
-
74
-
75
- def run_with_retries(func, *args, **kwargs):
76
- """Runs a provided function with retries in the event of a failure."""
77
- for i in range(0, RETRIES):
78
- try:
79
- return func(*args, **kwargs)
80
- except (Exception, RuntimeError):
81
- print(f"Failed to run function: {traceback.format_exc()}")
82
- if i == RETRIES - 1:
83
- raise
84
- time.sleep(DELAY_SECS)
85
- raise RuntimeError("Should never happen")
86
-
87
-
88
- def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
89
- """Returns a subtensor and metagraph for the finetuning subnet."""
90
-
91
- def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
92
- if SUBTENSOR_ENDPOINT:
93
- parser = argparse.ArgumentParser()
94
- bt.subtensor.add_args(parser)
95
- subtensor = bt.subtensor(
96
- config=bt.config(
97
- parser=parser,
98
- args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT],
99
- )
100
- )
101
- else:
102
- subtensor = bt.subtensor("finney")
103
-
104
- metagraph = subtensor.metagraph(NETUID, lite=False)
105
-
106
- return subtensor, metagraph
107
-
108
- return run_with_retries(_internal)
109
-
110
-
111
- def get_subnet_data(
112
- subtensor: bt.subtensor, metagraph: bt.metagraph
113
- ) -> List[ModelData]:
114
- result = []
115
- for uid in metagraph.uids.tolist():
116
- hotkey = metagraph.hotkeys[uid]
117
- metadata = None
118
- try:
119
- metadata = run_with_retries(
120
- functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
121
- )
122
- except:
123
- print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
124
-
125
- if not metadata:
126
- continue
127
-
128
- commitment = metadata["info"]["fields"][0]
129
- hex_data = commitment[list(commitment.keys())[0]][2:]
130
- chain_str = bytes.fromhex(hex_data).decode()
131
- block = metadata["block"]
132
-
133
- incentive = np.nan_to_num(metagraph.incentive[uid]).item()
134
- emission = (
135
- np.nan_to_num(metagraph.emission[uid]).item() * 20
136
- ) # convert to daily TAO
137
-
138
- model_data = None
139
- try:
140
- model_data = ModelData.from_compressed_str(
141
- uid, hotkey, chain_str, block, incentive, emission
142
- )
143
- except:
144
- continue
145
-
146
- result.append(model_data)
147
- return result
148
-
149
-
150
- def get_wandb_runs(
151
- project: str, filters: Dict[str, Any], order: str = "-created_at"
152
- ) -> List:
153
- """Get the latest runs from Wandb, retrying infinitely until we get them.
154
-
155
- Args:
156
- project (str): The Wandb project to get runs from.
157
- filters (Dict[str, Any]): Filters to apply to the runs.
158
- order (str): Order to sort the runs by. Defaults to "-created_at" (newest first)
159
-
160
- Returns:
161
- List: List of runs matching the provided filters
162
- """
163
- while True:
164
- api = wandb.Api(api_key=WANDB_TOKEN, timeout=100)
165
- runs = list(
166
- api.runs(
167
- project,
168
- filters=filters,
169
- order=order,
170
- )
171
- )
172
- if len(runs) > 0:
173
- return runs
174
- # WandDB API is quite unreliable. Wait another minute and try again.
175
- print("Failed to get runs from Wandb. Trying again in 60 seconds.")
176
- time.sleep(60)
177
-
178
-
179
- def get_scores(
180
- uids: List[int],
181
- wandb_runs: List,
182
- ) -> Dict[int, Dict[str, Optional[float]]]:
183
- """Returns the most recent scores for the provided UIDs.
184
-
185
- Args:
186
- uids (List[int]): List of UIDs to get scores for.
187
- wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
188
- """
189
- result = {}
190
- previous_timestamp = None
191
- seen_competitions = set()
192
- # Iterate through the runs until we've processed all the uids.
193
- for i, run in enumerate(wandb_runs):
194
- if not "original_format_json" in run.summary:
195
- continue
196
- data = json.loads(run.summary["original_format_json"])
197
- all_uid_data = data["uid_data"]
198
- timestamp = data["timestamp"]
199
- # Make sure runs are indeed in descending time order.
200
- assert (
201
- previous_timestamp is None or timestamp < previous_timestamp
202
- ), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
203
- previous_timestamp = timestamp
204
-
205
- comp_id = data.get("competition_id", None)
206
- for uid in uids:
207
- if uid in result:
208
- continue
209
- if str(uid) in all_uid_data:
210
- uid_data = all_uid_data[str(uid)]
211
- # Only the most recent run per competition is fresh.
212
- is_fresh = comp_id not in seen_competitions
213
- result[uid] = {
214
- "avg_loss": uid_data.get("average_loss", None),
215
- "win_rate": uid_data.get("win_rate", None),
216
- "win_total": uid_data.get("win_total", None),
217
- "weight": uid_data.get("weight", None),
218
- "competition_id": uid_data.get("competition_id", None),
219
- "fresh": is_fresh,
220
- }
221
- seen_competitions.add(comp_id)
222
- if len(result) == len(uids):
223
- break
224
- return result
225
-
226
-
227
- def get_validator_weights(
228
- metagraph: bt.metagraph,
229
- ) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
230
- """Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
231
- ret = {}
232
- for uid in metagraph.uids.tolist():
233
- vtrust = metagraph.validator_trust[uid].item()
234
- stake = metagraph.stake[uid].item()
235
- if vtrust > 0 and stake > 10_000:
236
- ret[uid] = (vtrust, stake, {})
237
- for ouid in metagraph.uids.tolist():
238
- if ouid == uid:
239
- continue
240
- weight = round(metagraph.weights[uid][ouid].item(), 4)
241
- if weight > 0:
242
- ret[uid][-1][ouid] = weight
243
- return ret
244
-
245
-
246
- def get_losses_over_time(wandb_runs: List, competition_id: int) -> pd.DataFrame:
247
- """Returns a dataframe of the best average model loss over time."""
248
- timestamps = []
249
- losses = []
250
-
251
- for run in wandb_runs:
252
- # For each run, check the 10 most recent steps.
253
- best_loss = math.inf
254
- should_add_datapoint = False
255
- min_step = max(0, run.lastHistoryStep - 10)
256
- history_scan = SampledHistoryScan(
257
- run.client,
258
- run,
259
- ["original_format_json"],
260
- min_step,
261
- run.lastHistoryStep,
262
- page_size=10,
263
- )
264
- max_timestamp = None
265
- for step in history_scan:
266
- data = json.loads(step["original_format_json"])
267
- all_uid_data = data["uid_data"]
268
- timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
269
- if max_timestamp is None:
270
- max_timestamp = timestamp
271
- max_timestamp = max(max_timestamp, timestamp)
272
-
273
- for _, uid_data in all_uid_data.items():
274
- loss = uid_data.get("average_loss", math.inf)
275
- c_id = uid_data.get("competition_id", None)
276
- if c_id is None or c_id != competition_id:
277
- continue
278
-
279
- # Filter out issue caused by wandb unavailability.
280
- if loss < 0.99 and loss < best_loss:
281
- best_loss = loss
282
- should_add_datapoint = True
283
- # Now that we've processed the run's most recent steps, check if we should add a datapoint.
284
- if should_add_datapoint:
285
- timestamps.append(max_timestamp)
286
- losses.append(best_loss)
287
-
288
- return pd.DataFrame({"timestamp": timestamps, "losses": losses})
289
-
290
-
291
- def is_floatable(x) -> bool:
292
- return (
293
- isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
294
- ) or isinstance(x, int)
295
-
296
-
297
- def format_score(uid: int, scores, key) -> Optional[float]:
298
- if uid in scores:
299
- if key in scores[uid]:
300
- point = scores[uid][key]
301
- if is_floatable(point):
302
- return round(scores[uid][key], 4)
303
- return None
304
-
305
-
306
- def leaderboard_data(
307
- leaderboard: List[ModelData],
308
- scores: Dict[int, Dict[str, Optional[float]]],
309
- competition_id: int,
310
- show_stale: bool,
311
- ) -> List[List[Any]]:
312
- """Returns the leaderboard data, based on models data and UID scores."""
313
- return [
314
- [
315
- f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
316
- format_score(c.uid, scores, "win_rate"),
317
- format_score(c.uid, scores, "avg_loss"),
318
- format_score(c.uid, scores, "weight"),
319
- c.uid,
320
- c.block,
321
- ]
322
- for c in leaderboard
323
- if c.competition_id == competition_id
324
- and ((c.uid in scores and scores[c.uid]["fresh"]) or show_stale)
325
- ]
326
-
327
-
328
- def get_benchmarks() -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]:
329
- """Returns the latest benchmarks and the time they were run."""
330
- if not BENCHMARK_WANDB_PROJECT:
331
- print("No benchmark project set.")
332
- return None, None
333
- runs = get_wandb_runs(
334
- project=BENCHMARK_WANDB_PROJECT, filters=None, order="+created_at"
335
- )
336
- timestamps, uids, models, comp_ids, mmlu, mmlu_pro = [], [], [], [], [], []
337
- for run in runs:
338
- uid = run.config.get("uid", None)
339
- model = run.config.get("model", None)
340
- # Any run without a competition_id was for competition 2.
341
- comp_name = run.config.get("competition_id", "B7_MULTI_CHOICE")
342
- comp_id = COMP_NAME_TO_ID.get(comp_name, 2)
343
- if not uid or not model:
344
- continue
345
- samples = list(
346
- HistoryScan(
347
- run.client,
348
- run,
349
- 0,
350
- 1,
351
- )
352
- )
353
- if not samples:
354
- continue
355
- sample = samples[0]
356
-
357
- # Make sure we have all the required keys.
358
- has_all_keys = True
359
- for required_key in ["mmlu.acc,none", "mmlu_pro", "_timestamp"]:
360
- if required_key not in sample:
361
- has_all_keys = False
362
- break
363
- if not has_all_keys:
364
- continue
365
-
366
- comp_ids.append(comp_id)
367
- timestamps.append(datetime.datetime.fromtimestamp(sample["_timestamp"]))
368
- mmlu.append(sample["mmlu.acc,none"])
369
- mmlu_pro.append(sample["mmlu_pro"])
370
- uids.append(uid)
371
- models.append(model)
372
- return (
373
- pd.DataFrame(
374
- {
375
- "timestamp": timestamps,
376
- "uid": uids,
377
- "model": models,
378
- "competition_id": comp_ids,
379
- "mmlu": mmlu,
380
- "mmlu_pro": mmlu_pro,
381
- }
382
- ),
383
- {
384
- "mmlu": {
385
- "Llama-3.1-8B-Instruct": 0.681,
386
- "Mistral-7B-Instruct-v0.3": 0.597,
387
- "gemma-2-9b-it": 0.719,
388
- },
389
- "mmlu_pro": {
390
- "Llama-3.1-8B-Instruct": 30.68,
391
- "Mistral-7B-Instruct-v0.3": 23.06,
392
- "gemma-2-9b-it": 31.95,
393
- },
394
- },
395
- )
396
-
397
-
398
- def make_validator_dataframe(
399
- validator_df: pd.DataFrame, model_data: ModelData
400
- ) -> pd.DataFrame:
401
-
402
- values = [
403
- [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
404
- + [validator_df[uid][-1].get(c.uid) for c in model_data if c.incentive]
405
- for uid, _ in sorted(
406
- zip(
407
- validator_df.keys(),
408
- [validator_df[x][1] for x in validator_df.keys()],
409
- ),
410
- key=lambda x: x[1],
411
- reverse=True,
412
- )
413
- ]
414
- dtypes = {"UID": int, "Stake (τ)": float, "V-Trust": float}
415
- dtypes.update(
416
- {
417
- f"{c.namespace}/{c.name} ({c.commit[0:8]})": float
418
- for c in model_data
419
- if c.incentive
420
- }
421
- )
422
- return pd.DataFrame(values, columns=dtypes.keys()).astype(dtypes)
423
-
424
-
425
- def make_metagraph_dataframe(metagraph: bt.metagraph, weights=False) -> pd.DataFrame:
426
-
427
- cols = [
428
- "stake",
429
- "emission",
430
- "trust",
431
- "validator_trust",
432
- "dividends",
433
- "incentive",
434
- "R",
435
- "consensus",
436
- "validator_permit",
437
- ]
438
-
439
- frame = pd.DataFrame({k: getattr(metagraph, k) for k in cols})
440
- frame["block"] = metagraph.block.item()
441
- frame["netuid"] = NETUID
442
- frame["uid"] = range(len(frame))
443
- frame["hotkey"] = [axon.hotkey for axon in metagraph.axons]
444
- frame["coldkey"] = [axon.coldkey for axon in metagraph.axons]
445
- if weights and metagraph.W is not None:
446
- # convert NxN tensor to a list of lists so it fits into the dataframe
447
- frame["weights"] = [w.tolist() for w in metagraph.W]
448
-
449
- return frame
450
-
451
-
452
- def load_state_vars() -> dict[Any]:
453
- while True:
454
- try:
455
- subtensor, metagraph = get_subtensor_and_metagraph()
456
-
457
- print(f"Loaded subtensor and metagraph: {metagraph}")
458
-
459
- model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
460
- model_data.sort(key=lambda x: x.incentive, reverse=True)
461
- print(f"Loaded {len(model_data)} models")
462
-
463
- vali_runs = get_wandb_runs(
464
- project=VALIDATOR_WANDB_PROJECT,
465
- filters={
466
- "$and": [{"config.type": "validator"}],
467
- "$or": [{"config.uid": 28}, {"config.uid": 16}],
468
- },
469
- )
470
- print(f"Loaded {len(vali_runs)} validator runs")
471
-
472
- scores = get_scores([x.uid for x in model_data], vali_runs)
473
- print(f"Loaded {len(scores)} scores")
474
-
475
- validator_df = get_validator_weights(metagraph)
476
- weight_keys = set()
477
- for uid, stats in validator_df.items():
478
- weight_keys.update(stats[-1].keys())
479
- print("Loaded validator weights")
480
-
481
- # Compute loss over time for all competitions.
482
- # losses_2 = get_losses_over_time(vali_runs, 2)
483
- # print("Loaded losses over time for comp 2")
484
-
485
- benchmarks_df, benchmarks_targets = get_benchmarks()
486
- print("Loaded benchmarks")
487
- break
488
-
489
- except KeyboardInterrupt:
490
- print("Exiting...")
491
- break
492
-
493
- except Exception as e:
494
- print(f"Failed to get data: {traceback.format_exc()}")
495
- time.sleep(30)
496
-
497
- return {
498
- "metagraph": metagraph,
499
- "model_data": model_data,
500
- "vali_runs": vali_runs,
501
- "scores": scores,
502
- "validator_df": validator_df,
503
- "benchmarks_df": benchmarks_df,
504
- "benchmarks_targets": benchmarks_targets,
505
- }