Skip to content

Commit

Permalink
Merge pull request #158 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 4.1.2
  • Loading branch information
cryptal-mc committed Aug 23, 2024
2 parents cb888b4 + f10dd01 commit 72e772c
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 34 deletions.
12 changes: 5 additions & 7 deletions constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,17 @@
GPTNeoXForCausalLM,
GPTJForCausalLM,
PhiForCausalLM,
Phi3ForCausalLM,
GemmaForCausalLM,
Gemma2ForCausalLM,
Qwen2ForCausalLM,
StableLmForCausalLM,
)

from taoverse.model.competition.data import (
Competition,
ModelConstraints,
NormValidationConstraints,
)
from taoverse.model.competition.epsilon import FixedEpsilon
from competitions.data import CompetitionId

from typing import Dict, List, Tuple
Expand All @@ -35,7 +34,7 @@
# ---------------------------------

# Release
__version__ = "4.1.0"
__version__ = "4.1.2"

# Validator schema version
__validator_version__ = "3.0.0"
Expand Down Expand Up @@ -75,8 +74,6 @@
FalconForCausalLM,
GPTNeoXForCausalLM,
GPTJForCausalLM,
StableLmForCausalLM,
Phi3ForCausalLM,
Qwen2ForCausalLM,
}
ALLOWED_MODEL_TYPES_2 = {
Expand All @@ -88,8 +85,6 @@
PhiForCausalLM,
GemmaForCausalLM,
Gemma2ForCausalLM,
StableLmForCausalLM,
Phi3ForCausalLM,
Qwen2ForCausalLM,
}

Expand All @@ -109,6 +104,7 @@
allowed_architectures=ALLOWED_MODEL_TYPES_1,
tokenizer="distilgpt2",
eval_block_delay=0,
epsilon_func=FixedEpsilon(0.005),
),
CompetitionId.B7_MODEL: ModelConstraints(
max_model_parameter_size=6_900_000_000,
Expand All @@ -121,6 +117,7 @@
"attn_implementation": "flash_attention_2",
},
eval_block_delay=0,
epsilon_func=FixedEpsilon(0.005),
),
CompetitionId.B3_MODEL: ModelConstraints(
max_model_parameter_size=3_400_000_000,
Expand All @@ -133,6 +130,7 @@
"attn_implementation": "flash_attention_2",
},
eval_block_delay=0,
epsilon_func=FixedEpsilon(0.005),
),
}

Expand Down
6 changes: 6 additions & 0 deletions neurons/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ def validator_config():
action="store_false",
help="Turn off wandb logging.",
)
parser.add_argument(
"--wandb_project",
type=str,
default=constants.WANDB_PROJECT,
help="The wandb project to log to.",
)
parser.add_argument(
"--blocks_per_epoch",
type=int,
Expand Down
74 changes: 50 additions & 24 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import constants
from taoverse.metagraph import utils as metagraph_utils
from taoverse.metagraph.metagraph_syncer import MetagraphSyncer
from taoverse.model import utils as model_utils
from taoverse.model.competition import utils as competition_utils
from taoverse.model.competition.competition_tracker import CompetitionTracker
from taoverse.model.competition.data import Competition
Expand Down Expand Up @@ -278,7 +279,7 @@ def _new_wandb_run(self):
name = "validator-" + str(self.uid) + "-" + run_id
self.wandb_run = wandb.init(
name=name,
project=constants.WANDB_PROJECT,
project=self.config.wandb_project,
entity="macrocosmos",
config={
"uid": self.uid,
Expand Down Expand Up @@ -702,9 +703,12 @@ async def run_step(self):
time.sleep(300)
return

# TODO: Consider condensing the following + competition id into a uid to metadata map.
# Keep track of which block this uid last updated their model.
# Default to an infinite block if we can't retrieve the metadata for the miner.
uid_to_block = defaultdict(lambda: math.inf)
# Keep track of the hugging face repo for this uid.
uid_to_hf = defaultdict(lambda: "unknown")

bt.logging.trace(f"Current block: {cur_block}")

Expand All @@ -725,21 +729,25 @@ async def run_step(self):
pages_per_eval = constants.pages_per_eval_pack

# If the option is set in the config, override
pages_per_eval = self.config.pages_per_eval if self.config.pages_per_eval is not None else pages_per_eval
pages_per_eval = (
self.config.pages_per_eval
if self.config.pages_per_eval is not None
else pages_per_eval
)

bt.logging.debug(f'Sample packing is set to: {pack_samples}.')
bt.logging.debug(f'Number of pages per evaluation step is: {pages_per_eval}')
bt.logging.debug(f"Sample packing is set to: {pack_samples}.")
bt.logging.debug(f"Number of pages per evaluation step is: {pages_per_eval}")

dataloader = SubsetDataLoader(
batch_size=constants.batch_size,
sequence_length=competition.constraints.sequence_length,
num_pages= pages_per_eval,
num_pages=pages_per_eval,
tokenizer=tokenizer,
pack_samples=pack_samples
)
pack_samples=pack_samples,
)

batches = list(dataloader)
bt.logging.debug(f'Number of validation batches is {len(batches)}')
bt.logging.debug(f"Number of validation batches is {len(batches)}")

# This is useful for logging to wandb
pages = dataloader.get_page_names()
Expand All @@ -758,11 +766,11 @@ async def run_step(self):
compute_loss_perf = PerfMonitor("Eval: Compute loss")

for uid_i in uids:
bt.logging.trace(f"Computing model losses for uid:{uid_i}.")

# This variable should be overwritten below if the model has metadata.
losses: typing.List[float] = [math.inf for _ in range(len(batches))]

bt.logging.trace(f"Getting metadata for uid: {uid_i}.")

# Check that the model is in the tracker.
with self.metagraph_lock:
hotkey = self.metagraph.hotkeys[uid_i]
Expand All @@ -776,8 +784,14 @@ async def run_step(self):
and model_i_metadata.id.competition_id == competition.id
):
try:
bt.logging.info(
f"Evaluating uid: {uid_i} / hotkey: {hotkey} with metadata: {model_i_metadata} and hf_url: {model_utils.get_hf_url(model_i_metadata)}."
)

# Update the block this uid last updated their model.
uid_to_block[uid_i] = model_i_metadata.block
# Update the hf repo for this model.
uid_to_hf[uid_i] = model_utils.get_hf_repo_name(model_i_metadata)

# Get the model locally and evaluate its loss.
model_i = None
Expand All @@ -795,7 +809,7 @@ async def run_step(self):
batches,
self.config.device,
tokenizer.eos_token_id,
pack_samples
pack_samples,
),
ttl=400,
mode="spawn",
Expand Down Expand Up @@ -879,6 +893,7 @@ async def run_step(self):
CompetitionId.B7_MODEL_LOWER_EPSILON,
uids,
uid_to_block,
uid_to_hf,
uids_to_competition_ids_epsilon_experiment,
pages,
model_weights_epsilon_experiment,
Expand Down Expand Up @@ -912,7 +927,9 @@ async def run_step(self):
# If the model has any significant weight, prioritize by weight with greater weights being kept first.
# Then for the unweighted models, prioritize by win_rate.
# Use the competition weights from the tracker which also handles moving averages.
tracker_competition_weights = self.competition_tracker.get_competition_weights(competition.id)
tracker_competition_weights = self.competition_tracker.get_competition_weights(
competition.id
)
model_prioritization = {
uid: (
# Add 1 to ensure it is always greater than a win rate.
Expand Down Expand Up @@ -942,6 +959,7 @@ async def run_step(self):
competition.id,
uids,
uid_to_block,
uid_to_hf,
self._get_uids_to_competition_ids(),
pages,
model_weights,
Expand All @@ -952,7 +970,6 @@ async def run_step(self):
compute_loss_perf,
)


# Increment the number of completed run steps by 1
self.run_step_count += 1

Expand All @@ -961,6 +978,7 @@ def log_step(
competition_id: CompetitionId,
uids: typing.List[int],
uid_to_block: typing.Dict[int, int],
uid_to_hf: typing.Dict[int, str],
uid_to_competition_id: typing.Dict[int, typing.Optional[int]],
pages: typing.List[str],
model_weights: typing.List[float],
Expand All @@ -981,32 +999,37 @@ def log_step(
}

# The sub-competition weights
sub_competition_weights = torch.softmax(model_weights / constants.temperature, dim=0)
sub_competition_weights = torch.softmax(
model_weights / constants.temperature, dim=0
)

for idx, uid in enumerate(uids):
step_log["uid_data"][str(uid)] = {
"uid": uid,
"block": uid_to_block[uid],
"hf": uid_to_hf[uid],
"competition_id": uid_to_competition_id[uid],
"average_loss": sum(losses_per_uid[uid]) / len(losses_per_uid[uid]),
"win_rate": win_rate[uid],
"win_total": wins[uid],
"weight": self.weights[uid].item(),
"norm_weight": sub_competition_weights[idx].item(),
}
table = Table(title="Step")
table = Table(title="Step", expand=True)
table.add_column("uid", justify="right", style="cyan", no_wrap=True)
table.add_column("average_loss", style="magenta")
table.add_column("win_rate", style="magenta")
table.add_column("win_total", style="magenta")
table.add_column("weights", style="magenta")
table.add_column("competition_weights", style="magenta")
table.add_column("block", style="magenta")
table.add_column("competition", style="magenta")
table.add_column("hf", style="magenta", overflow="fold")
table.add_column("average_loss", style="magenta", overflow="fold")
table.add_column("win_rate", style="magenta", overflow="fold")
table.add_column("win_total", style="magenta", overflow="fold")
table.add_column("total_weight", style="magenta", overflow="fold")
table.add_column("comp_weight", style="magenta", overflow="fold")
table.add_column("block", style="magenta", overflow="fold")
table.add_column("comp", style="magenta", overflow="fold")
for idx, uid in enumerate(uids):
try:
table.add_row(
str(uid),
str(step_log["uid_data"][str(uid)]["hf"]),
str(round(step_log["uid_data"][str(uid)]["average_loss"], 4)),
str(round(step_log["uid_data"][str(uid)]["win_rate"], 4)),
str(step_log["uid_data"][str(uid)]["win_total"]),
Expand Down Expand Up @@ -1069,7 +1092,10 @@ def log_step(
str(uid): uid_data[str(uid)]["win_total"] for uid in uids
},
"weight_data": {str(uid): self.weights[uid].item() for uid in uids},
"norm_weight_data": {str(uid): sub_competition_weights[i].item() for i, uid in enumerate(uids)},
"competition_weight_data": {
str(uid): sub_competition_weights[i].item()
for i, uid in enumerate(uids)
},
"competition_id": {
str(uid): uid_to_competition_id[uid]
for uid in uids
Expand All @@ -1094,7 +1120,7 @@ def log_step(
step=self.last_wandb_step,
)

self.last_wandb_step+=1
self.last_wandb_step += 1

def _get_uids_to_competition_ids(
self,
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bittensor==6.9.3
bittensor==6.9.4
huggingface_hub
matplotlib
pydantic==1.10
Expand All @@ -7,8 +7,8 @@ rich
safetensors
torch
numpy
transformers==4.42.0
transformers==4.44.1
wandb
datasets
flash-attn
taoverse==1.0.1
taoverse==1.0.2

0 comments on commit 72e772c

Please sign in to comment.