Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
a2dcddd
1
Parent(s):
17bb6e0
First pass at a working leaderboard
Browse files- .gitignore +3 -0
- app.py +7 -9
- competitions.py +23 -0
- requirements.txt +2 -1
- utils.py +47 -18
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
__pycache__/
|
3 |
+
.env
|
app.py
CHANGED
@@ -9,6 +9,7 @@ from dotenv import load_dotenv
|
|
9 |
from huggingface_hub import HfApi
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
|
|
|
12 |
import utils
|
13 |
|
14 |
FONT = (
|
@@ -20,11 +21,6 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/ma
|
|
20 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
21 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
22 |
|
23 |
-
# A map of competition IDs to HTML descriptions.
|
24 |
-
COMPETITION_DETAILS: Dict[int, str] = {
|
25 |
-
1: """<b>Competition ID 1:</b> Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
|
26 |
-
}
|
27 |
-
|
28 |
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
|
29 |
SECONDS_PER_BLOCK = 12
|
30 |
|
@@ -93,8 +89,9 @@ def main():
|
|
93 |
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
|
94 |
competition_leaderboards = []
|
95 |
# TODO: Dynamically generate per-competition leaderboards based on model_data.
|
96 |
-
|
97 |
-
|
|
|
98 |
competition_leaderboards.append(gr.components.Dataframe(
|
99 |
value=utils.leaderboard_data(model_data, scores, show_stale.value),
|
100 |
headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
|
@@ -110,13 +107,14 @@ def main():
|
|
110 |
outputs=competition_leaderboards,
|
111 |
)
|
112 |
|
|
|
113 |
gr.LinePlot(
|
114 |
utils.get_losses_over_time(vali_runs),
|
115 |
x="timestamp",
|
116 |
x_title="Date",
|
117 |
-
y="
|
118 |
y_title="Average Loss",
|
119 |
-
tooltip="
|
120 |
interactive=True,
|
121 |
visible=True,
|
122 |
width=1024,
|
|
|
9 |
from huggingface_hub import HfApi
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
|
12 |
+
import competitions
|
13 |
import utils
|
14 |
|
15 |
FONT = (
|
|
|
21 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
22 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
|
25 |
SECONDS_PER_BLOCK = 12
|
26 |
|
|
|
89 |
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
|
90 |
competition_leaderboards = []
|
91 |
# TODO: Dynamically generate per-competition leaderboards based on model_data.
|
92 |
+
competition_details = competitions.COMPETITION_DETAILS[1]
|
93 |
+
with gr.Accordion(f"{competition_details.name} competition"):
|
94 |
+
gr.HTML(competition_details.html_description)
|
95 |
competition_leaderboards.append(gr.components.Dataframe(
|
96 |
value=utils.leaderboard_data(model_data, scores, show_stale.value),
|
97 |
headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
|
|
|
107 |
outputs=competition_leaderboards,
|
108 |
)
|
109 |
|
110 |
+
# TODO: Make this a multi-competition line plot
|
111 |
gr.LinePlot(
|
112 |
utils.get_losses_over_time(vali_runs),
|
113 |
x="timestamp",
|
114 |
x_title="Date",
|
115 |
+
y="SN9_MODEL",
|
116 |
y_title="Average Loss",
|
117 |
+
tooltip="SN9_MODEL",
|
118 |
interactive=True,
|
119 |
visible=True,
|
120 |
width=1024,
|
competitions.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
|
4 |
+
from dataclasses import dataclass
|
5 |
+
from typing import Dict
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass(frozen=True)
|
9 |
+
class CompetitionDetails:
|
10 |
+
# The display name of the competition.
|
11 |
+
name: str
|
12 |
+
|
13 |
+
# The HTML description of the competition.
|
14 |
+
html_description: str
|
15 |
+
|
16 |
+
|
17 |
+
# A map of competition IDs to HTML descriptions.
|
18 |
+
COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
|
19 |
+
1: CompetitionDetails(
|
20 |
+
name="SN9_MODEL",
|
21 |
+
html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
|
22 |
+
)
|
23 |
+
}
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
bittensor
|
2 |
requests
|
3 |
-
wandb
|
|
|
4 |
python-dotenv
|
5 |
APScheduler
|
6 |
huggingface-hub
|
|
|
1 |
bittensor
|
2 |
requests
|
3 |
+
wandb==0.17.1
|
4 |
+
numpy==1.26.4
|
5 |
python-dotenv
|
6 |
APScheduler
|
7 |
huggingface-hub
|
utils.py
CHANGED
@@ -6,7 +6,9 @@ import math
|
|
6 |
import os
|
7 |
import time
|
8 |
import traceback
|
|
|
9 |
from dataclasses import dataclass
|
|
|
10 |
from typing import Any, Dict, List, Optional, Tuple
|
11 |
|
12 |
import bittensor as bt
|
@@ -15,6 +17,9 @@ import pandas as pd
|
|
15 |
import wandb
|
16 |
from bittensor.extrinsics.serving import get_metadata
|
17 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
18 |
|
19 |
# TODO: Update once registered
|
20 |
NETUID = 179
|
@@ -235,24 +240,48 @@ def get_validator_weights(
|
|
235 |
def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
|
236 |
"""Returns a dataframe of the best average model loss over time."""
|
237 |
timestamps = []
|
238 |
-
|
239 |
-
|
240 |
for run in wandb_runs:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
|
258 |
def next_epoch(subtensor: bt.subtensor, block: int) -> int:
|
@@ -384,7 +413,7 @@ def load_state_vars() -> dict[Any]:
|
|
384 |
vali_runs = get_wandb_runs(
|
385 |
project=VALIDATOR_WANDB_PROJECT,
|
386 |
# TODO: Update to point to the OTF vali on finetuning
|
387 |
-
filters={"config.type": "validator", "config.uid":
|
388 |
)
|
389 |
|
390 |
scores = get_scores([x.uid for x in model_data], vali_runs)
|
|
|
6 |
import os
|
7 |
import time
|
8 |
import traceback
|
9 |
+
from collections import defaultdict
|
10 |
from dataclasses import dataclass
|
11 |
+
from email.policy import default
|
12 |
from typing import Any, Dict, List, Optional, Tuple
|
13 |
|
14 |
import bittensor as bt
|
|
|
17 |
import wandb
|
18 |
from bittensor.extrinsics.serving import get_metadata
|
19 |
from dotenv import load_dotenv
|
20 |
+
from wandb.apis.public.history import HistoryScan
|
21 |
+
|
22 |
+
import competitions
|
23 |
|
24 |
# TODO: Update once registered
|
25 |
NETUID = 179
|
|
|
240 |
def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
|
241 |
"""Returns a dataframe of the best average model loss over time."""
|
242 |
timestamps = []
|
243 |
+
datapoints_per_comp_id = {id: [] for id in competitions.COMPETITION_DETAILS}
|
244 |
+
|
245 |
for run in wandb_runs:
|
246 |
+
# For each run, check the 10 most recent steps.
|
247 |
+
best_loss_per_competition_id = defaultdict(lambda: math.inf)
|
248 |
+
should_add_datapoint = False
|
249 |
+
min_step = max(0, run.lastHistoryStep - 10)
|
250 |
+
history_scan = HistoryScan(
|
251 |
+
run.client, run, min_step, run.lastHistoryStep, page_size=10
|
252 |
+
)
|
253 |
+
max_timestamp = None
|
254 |
+
for step in history_scan:
|
255 |
+
if "original_format_json" not in step:
|
256 |
+
continue
|
257 |
+
data = json.loads(step["original_format_json"])
|
258 |
+
all_uid_data = data["uid_data"]
|
259 |
+
timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
|
260 |
+
if max_timestamp is None:
|
261 |
+
max_timestamp = timestamp
|
262 |
+
max_timestamp = max(max_timestamp, timestamp)
|
263 |
+
|
264 |
+
for _, uid_data in all_uid_data.items():
|
265 |
+
loss = uid_data.get("average_loss", math.inf)
|
266 |
+
competition_id = uid_data.get("competition_id", None)
|
267 |
+
if not competition_id:
|
268 |
+
continue
|
269 |
+
|
270 |
+
if loss < best_loss_per_competition_id[competition_id]:
|
271 |
+
best_loss_per_competition_id[competition_id] = uid_data["average_loss"]
|
272 |
+
should_add_datapoint = True
|
273 |
+
# Now that we've processed the run's most recent steps, check if we should add a datapoint.
|
274 |
+
if should_add_datapoint:
|
275 |
+
timestamps.append(max_timestamp)
|
276 |
+
# Iterate through all possible competitions and add the best loss for each.
|
277 |
+
# Set None for any that aren't active during this run.
|
278 |
+
for id, losses in datapoints_per_comp_id.items():
|
279 |
+
losses.append(best_loss_per_competition_id.get(id, None))
|
280 |
+
|
281 |
+
# Create a dictionary of competitions to lists of losses.
|
282 |
+
output_columns = {competitions.COMPETITION_DETAILS[id].name: losses for id, losses in datapoints_per_comp_id.items()}
|
283 |
+
|
284 |
+
return pd.DataFrame({"timestamp": timestamps, **output_columns})
|
285 |
|
286 |
|
287 |
def next_epoch(subtensor: bt.subtensor, block: int) -> int:
|
|
|
413 |
vali_runs = get_wandb_runs(
|
414 |
project=VALIDATOR_WANDB_PROJECT,
|
415 |
# TODO: Update to point to the OTF vali on finetuning
|
416 |
+
filters={"config.type": "validator", "config.uid": 0},
|
417 |
)
|
418 |
|
419 |
scores = get_scores([x.uid for x in model_data], vali_runs)
|