Skip to content

Commit

Permalink
Merge pull request #129 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 3.2.1 (Bugfix)
  • Loading branch information
cryptal-mc authored Jun 25, 2024
2 parents cda4dfe + 0c86f95 commit e2ff6ae
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 105 deletions.
21 changes: 4 additions & 17 deletions constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# ---------------------------------

# Release
__version__ = "3.2.0"
__version__ = "3.2.1"

# Validator schema version
__validator_version__ = "2.2.2"
Expand All @@ -31,8 +31,10 @@

# The validator WANDB project.
WANDB_PROJECT = "pretraining-subnet"

# The uid for this subnet.
SUBNET_UID = 9

# The root directory of this project.
ROOT_DIR = Path(__file__).parent.parent

Expand All @@ -41,7 +43,7 @@
BLOCK_7B = 2_786_061

# Block at which FineWeb edu score 2 dataset is used for evaluation
BLOCK_FW_EDU_SCORE_2 = 3_256_604
BLOCK_FW_EDU_SCORE_2 = 3_307_004

# FIXING MODEL CRITERIA

Expand Down Expand Up @@ -86,7 +88,6 @@
max_model_parameters=186_000_000,
allowed_model_types=ALLOWED_MODEL_TYPES_1,
tokenizer_identifier=TokenizerIdentifier.DISTILGPT_2,
evaluation_dataset=DATASET_1,
),
),
(
Expand All @@ -98,7 +99,6 @@
max_model_parameters=772_000_000,
allowed_model_types=ALLOWED_MODEL_TYPES_1,
tokenizer_identifier=TokenizerIdentifier.DISTILGPT_2,
evaluation_dataset=DATASET_1,
),
),
(
Expand All @@ -110,19 +110,6 @@
max_model_parameters=6_900_000_000,
allowed_model_types=ALLOWED_MODEL_TYPES_2,
tokenizer_identifier=TokenizerIdentifier.GPT_4_TIKTOKEN,
evaluation_dataset=DATASET_1,
),
),
(
BLOCK_FW_EDU_SCORE_2,
ModelCriteria(
sequence_length=SEQUENCE_LENGTH_2,
optimized=True,
max_model_bytes=15 * 1024 * 1024 * 1024,
max_model_parameters=6_900_000_000,
allowed_model_types=ALLOWED_MODEL_TYPES_2,
tokenizer_identifier=TokenizerIdentifier.GPT_4_TIKTOKEN,
evaluation_dataset=DATASET_2,
),
),
]
Expand Down
4 changes: 1 addition & 3 deletions model/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,4 @@ class ModelCriteria:

# Tokenizer to use.
tokenizer_identifier: TokenizerIdentifier

# Evaluation dataset
evaluation_dataset: str

2 changes: 1 addition & 1 deletion model/storage/chain/chain_model_metadata_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def retrieve_model_metadata(self, hotkey: str) -> Optional[ModelMetadata]:
bt.extrinsics.serving.get_metadata, self.subtensor, self.subnet_uid, hotkey
)

metadata = utils.run_in_subprocess(partial, 60)
metadata = utils.run_in_subprocess(partial, 180)

if not metadata:
return None
Expand Down
100 changes: 49 additions & 51 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(self):
self.new_wandb_run()

# === Running args ===
self.weights = torch.zeros_like(self.metagraph.S)
self.weights = torch.zeros_like(torch.tensor(self.metagraph.S))
self.epoch_step = 0
self.global_step = 0
self.last_epoch = self.metagraph.block.item()
Expand Down Expand Up @@ -524,54 +524,53 @@ async def run_step(self):
# Default to an infinite block if we can't retrieve the metadata for the miner.
uid_to_block = defaultdict(lambda: math.inf)

# Generate random pages for evaluation and prepare batches for each page
# the dataset contains >900 million pages to eval over.
bt.logging.trace(f'Current block: {self.current_block}')

old_pages = [
random.randint(1, pt.dataset.SubsetFalconLoader.max_pages)
for _ in range(self.config.pages_per_eval)
]
# Decide on which dataset loader class to use
if self.current_block >= constants.BLOCK_FW_EDU_SCORE_2:
bt.logging.trace(f'Dataset in use: {constants.DATASET_2}.')
SubsetDataLoader = pt.dataset.SubsetFineWebEdu2Loader
else:
bt.logging.trace(f'Dataset in use: {constants.DATASET_1}.')
SubsetDataLoader = pt.dataset.SubsetFalconLoader

# Temporary ugliness to load the batches with both the previous tokenizer
# and the new tokenizer. batches_old can be removed once the block is newer
# than the point we allow 7B parameter models.
# old_tokenizer = pt.model.get_old_tokenizer(cache_dir=self.config.model_dir)
# batches_old = list(
# pt.dataset.SubsetFalconLoader(
# batch_size=constants.batch_size,
# sequence_length=constants.SEQUENCE_LENGTH_1,
# pages=pages,
# tokenizer=old_tokenizer,
# )
# )

new_tokenizer = pt.model.get_tokenizer(cache_dir=self.config.model_dir)
old_batches = list(
pt.dataset.SubsetFalconLoader(

## First tokenizer (Prior to 7B models)
tokenizer_old = pt.model.get_old_tokenizer(cache_dir=self.config.model_dir)
dataloader_old = SubsetDataLoader(
batch_size=constants.batch_size,
sequence_length=constants.SEQUENCE_LENGTH_2,
pages=old_pages,
tokenizer=new_tokenizer,
sequence_length=constants.SEQUENCE_LENGTH_1,
num_pages=self.config.pages_per_eval, # The pages will be sampled inside the object
tokenizer=tokenizer_old,
)

batches_old = list(
dataloader_old
)

# This is useful for logging to wandb
pages = dataloader_old.get_page_names()

new_dataset = pt.dataset.SubsetFineWebEdu2Loader(
## Second tokenizer (For 7B models)
tokenizer_new = pt.model.get_tokenizer(cache_dir=self.config.model_dir)
dataloader_new = SubsetDataLoader(
batch_size=constants.batch_size,
sequence_length=constants.SEQUENCE_LENGTH_2,
num_pages=self.config.pages_per_eval,
tokenizer=new_tokenizer,
)

new_batches = list(
new_dataset
num_pages=None, # Do not automatically generate pages. They will be manually set.
tokenizer=tokenizer_new,
)

# Use the same pages as for models with old tokenizers
dataloader_new.fetch_data_for_pages(pages=dataloader_old.pages)

batches_new = list(
dataloader_new
)

# This is useful for logging to wandb
new_pages = [f'{cfg_name}_{num_rows}_{split}' for
cfg_name, num_rows, split in new_dataset.pages]

# bt.logging.debug(f"Computing losses on {uids} with pages {pages}")
bt.logging.debug(f"Computing losses on {uids} with pages {pages}")

# Compute model losses on batches.
losses_per_uid = {muid: None for muid in uids}
Expand All @@ -588,8 +587,8 @@ async def run_step(self):
hotkey
)

old_losses = [math.inf for _ in range(len(old_batches))]
new_losses = [math.inf for _ in range(len(new_batches))]
# This variable should be overwritten below if the model has metadata.
losses = [math.inf for _ in range(len(batches_new))]

if model_i_metadata != None:
try:
Expand All @@ -601,8 +600,6 @@ async def run_step(self):
optimized = criteria.optimized
# Use tokenizer based on block.
tokenizer_identifier = criteria.tokenizer_identifier
# datasets to use based on block.
dataset = criteria.evaluation_dataset

# Get the model locally and evaluate its loss.
model_i = None
Expand All @@ -616,18 +613,17 @@ async def run_step(self):
with compute_loss_perf.sample():
# Run each computation in a subprocess so that the GPU is reset between each model.
batches_to_use = None

# Keeping identical behavior of getting this from eos token id.
# Currently we set pad token = eos token but not the ids on the get tokenizer methods.
pad_token_id = new_tokenizer.eos_token_id
pad_token_id = None

if dataset == constants.DATASET_1:
batches_to_use = old_batches
losses = old_losses
pages_to_use = old_pages
if tokenizer_identifier == TokenizerIdentifier.DISTILGPT_2:
batches_to_use = batches_old
pad_token_id = tokenizer_old.eos_token_id
else:
batches_to_use = new_batches
losses = new_losses
pages_to_use = new_pages
batches_to_use = batches_new
pad_token_id = tokenizer_new.eos_token_id

losses = utils.run_in_subprocess(
functools.partial(
Expand Down Expand Up @@ -658,7 +654,7 @@ async def run_step(self):

# Compute wins and win rates per uid.
wins, win_rate = pt.validation.compute_wins(
uids, losses_per_uid, batches_to_use, uid_to_block
uids, losses_per_uid, batches_new, uid_to_block
)

# Compute softmaxed weights based on win rate.
Expand Down Expand Up @@ -707,7 +703,7 @@ async def run_step(self):
self.log_step(
uids,
uid_to_block,
pages_to_use,
pages,
wins,
win_rate,
losses_per_uid,
Expand Down Expand Up @@ -818,10 +814,12 @@ async def run(self):
"""Runs the validator loop, which continuously evaluates models and sets weights."""
while True:
try:

while (
self.metagraph.block.item() - self.last_epoch
< self.config.blocks_per_epoch
(self.metagraph.block.item() - self.last_epoch)
< self.config.blocks_per_epoch
):
self.current_block = self.metagraph.block.item()
await self.try_run_step(ttl=60 * 20)
await self.try_sync_metagraph(ttl=60)
self.save_state()
Expand Down
Loading

0 comments on commit e2ff6ae

Please sign in to comment.