From 3a41abba77a332459987734e15d679ab093fade8 Mon Sep 17 00:00:00 2001 From: Sid Date: Sun, 18 Aug 2024 15:11:50 -0700 Subject: [PATCH 01/10] Log metadata in eval loop. --- neurons/validator.py | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/neurons/validator.py b/neurons/validator.py index 376ddbe..bbc1668 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -725,21 +725,25 @@ async def run_step(self): pages_per_eval = constants.pages_per_eval_pack # If the option is set in the config, override - pages_per_eval = self.config.pages_per_eval if self.config.pages_per_eval is not None else pages_per_eval + pages_per_eval = ( + self.config.pages_per_eval + if self.config.pages_per_eval is not None + else pages_per_eval + ) - bt.logging.debug(f'Sample packing is set to: {pack_samples}.') - bt.logging.debug(f'Number of pages per evaluation step is: {pages_per_eval}') + bt.logging.debug(f"Sample packing is set to: {pack_samples}.") + bt.logging.debug(f"Number of pages per evaluation step is: {pages_per_eval}") dataloader = SubsetDataLoader( batch_size=constants.batch_size, sequence_length=competition.constraints.sequence_length, - num_pages= pages_per_eval, + num_pages=pages_per_eval, tokenizer=tokenizer, - pack_samples=pack_samples - ) + pack_samples=pack_samples, + ) batches = list(dataloader) - bt.logging.debug(f'Number of validation batches is {len(batches)}') + bt.logging.debug(f"Number of validation batches is {len(batches)}") # This is useful for logging to wandb pages = dataloader.get_page_names() @@ -758,11 +762,11 @@ async def run_step(self): compute_loss_perf = PerfMonitor("Eval: Compute loss") for uid_i in uids: - bt.logging.trace(f"Computing model losses for uid:{uid_i}.") - # This variable should be overwritten below if the model has metadata. losses: typing.List[float] = [math.inf for _ in range(len(batches))] + bt.logging.trace(f"Getting metadata for uid: {uid_i}.") + # Check that the model is in the tracker. with self.metagraph_lock: hotkey = self.metagraph.hotkeys[uid_i] @@ -776,6 +780,10 @@ async def run_step(self): and model_i_metadata.id.competition_id == competition.id ): try: + bt.logging.info( + f"Evaluating uid: {uid_i} / hotkey: {hotkey} with metadata: {model_i_metadata}." + ) + # Update the block this uid last updated their model. uid_to_block[uid_i] = model_i_metadata.block @@ -795,7 +803,7 @@ async def run_step(self): batches, self.config.device, tokenizer.eos_token_id, - pack_samples + pack_samples, ), ttl=400, mode="spawn", @@ -912,7 +920,9 @@ async def run_step(self): # If the model has any significant weight, prioritize by weight with greater weights being kept first. # Then for the unweighted models, prioritize by win_rate. # Use the competition weights from the tracker which also handles moving averages. - tracker_competition_weights = self.competition_tracker.get_competition_weights(competition.id) + tracker_competition_weights = self.competition_tracker.get_competition_weights( + competition.id + ) model_prioritization = { uid: ( # Add 1 to ensure it is always greater than a win rate. @@ -952,7 +962,6 @@ async def run_step(self): compute_loss_perf, ) - # Increment the number of completed run steps by 1 self.run_step_count += 1 @@ -981,7 +990,9 @@ def log_step( } # The sub-competition weights - sub_competition_weights = torch.softmax(model_weights / constants.temperature, dim=0) + sub_competition_weights = torch.softmax( + model_weights / constants.temperature, dim=0 + ) for idx, uid in enumerate(uids): step_log["uid_data"][str(uid)] = { @@ -1069,7 +1080,10 @@ def log_step( str(uid): uid_data[str(uid)]["win_total"] for uid in uids }, "weight_data": {str(uid): self.weights[uid].item() for uid in uids}, - "norm_weight_data": {str(uid): sub_competition_weights[i].item() for i, uid in enumerate(uids)}, + "norm_weight_data": { + str(uid): sub_competition_weights[i].item() + for i, uid in enumerate(uids) + }, "competition_id": { str(uid): uid_to_competition_id[uid] for uid in uids @@ -1094,7 +1108,7 @@ def log_step( step=self.last_wandb_step, ) - self.last_wandb_step+=1 + self.last_wandb_step += 1 def _get_uids_to_competition_ids( self, From 607b243a7c2428c29e986586c305b536551c0a85 Mon Sep 17 00:00:00 2001 From: Sid Date: Sun, 18 Aug 2024 15:28:31 -0700 Subject: [PATCH 02/10] Also construct hf_url for the log. --- neurons/validator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neurons/validator.py b/neurons/validator.py index bbc1668..662e370 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -33,6 +33,7 @@ import constants from taoverse.metagraph import utils as metagraph_utils from taoverse.metagraph.metagraph_syncer import MetagraphSyncer +from taoverse.model import utils as model_utils from taoverse.model.competition import utils as competition_utils from taoverse.model.competition.competition_tracker import CompetitionTracker from taoverse.model.competition.data import Competition @@ -781,7 +782,7 @@ async def run_step(self): ): try: bt.logging.info( - f"Evaluating uid: {uid_i} / hotkey: {hotkey} with metadata: {model_i_metadata}." + f"Evaluating uid: {uid_i} / hotkey: {hotkey} with metadata: {model_i_metadata} and hf_url: {model_utils.get_hf_url(model_i_metadata)}." ) # Update the block this uid last updated their model. From bdb2a3b1e2e3fd3a85726c20c2a6f6c56c2df3df Mon Sep 17 00:00:00 2001 From: Sid Date: Sun, 18 Aug 2024 18:44:37 -0700 Subject: [PATCH 03/10] Also include hf_url in the step_log. --- neurons/validator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/neurons/validator.py b/neurons/validator.py index 662e370..77b5173 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -703,9 +703,12 @@ async def run_step(self): time.sleep(300) return + # TODO: Consider condensing the following + competition id into a uid to metadata map. # Keep track of which block this uid last updated their model. # Default to an infinite block if we can't retrieve the metadata for the miner. uid_to_block = defaultdict(lambda: math.inf) + # Keep track of the hugging face url for this uid. + uid_to_hf_url = defaultdict(lambda: "unknown") bt.logging.trace(f"Current block: {cur_block}") @@ -787,6 +790,8 @@ async def run_step(self): # Update the block this uid last updated their model. uid_to_block[uid_i] = model_i_metadata.block + # Update the hf url for this model. + uid_to_hf_url[uid_i] = model_utils.get_hf_url(model_i_metadata) # Get the model locally and evaluate its loss. model_i = None @@ -888,6 +893,7 @@ async def run_step(self): CompetitionId.B7_MODEL_LOWER_EPSILON, uids, uid_to_block, + uid_to_hf_url, uids_to_competition_ids_epsilon_experiment, pages, model_weights_epsilon_experiment, @@ -953,6 +959,7 @@ async def run_step(self): competition.id, uids, uid_to_block, + uid_to_hf_url, self._get_uids_to_competition_ids(), pages, model_weights, @@ -971,6 +978,7 @@ def log_step( competition_id: CompetitionId, uids: typing.List[int], uid_to_block: typing.Dict[int, int], + uid_to_hf_url: typing.Dict[int, str], uid_to_competition_id: typing.Dict[int, typing.Optional[int]], pages: typing.List[str], model_weights: typing.List[float], @@ -999,6 +1007,7 @@ def log_step( step_log["uid_data"][str(uid)] = { "uid": uid, "block": uid_to_block[uid], + "hf_url": uid_to_hf_url[uid], "competition_id": uid_to_competition_id[uid], "average_loss": sum(losses_per_uid[uid]) / len(losses_per_uid[uid]), "win_rate": win_rate[uid], @@ -1015,6 +1024,7 @@ def log_step( table.add_column("competition_weights", style="magenta") table.add_column("block", style="magenta") table.add_column("competition", style="magenta") + table.add_column("hugging_face_url", style="magenta") for idx, uid in enumerate(uids): try: table.add_row( @@ -1026,6 +1036,7 @@ def log_step( str(round(sub_competition_weights[idx].item(), 4)), str(step_log["uid_data"][str(uid)]["block"]), str(step_log["uid_data"][str(uid)]["competition_id"]), + str(step_log["uid_data"][str(uid)]["hf_url"]), ) except: pass From 39fe37e9f7aebffd51833ca63a7cb6d0de60f61d Mon Sep 17 00:00:00 2001 From: Sid Date: Tue, 20 Aug 2024 16:26:04 -0700 Subject: [PATCH 04/10] Bump bittensor to 6.9.4. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f48ddf..a96638d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -bittensor==6.9.3 +bittensor==6.9.4 huggingface_hub matplotlib pydantic==1.10 From 4c4e5e9f089dce8e65c2a7811abecd1aaadcc327 Mon Sep 17 00:00:00 2001 From: Sid Date: Tue, 20 Aug 2024 19:55:13 -0700 Subject: [PATCH 05/10] Use a config wandb project in the validator wandb logging. --- neurons/config.py | 6 ++++++ neurons/validator.py | 38 ++++++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/neurons/config.py b/neurons/config.py index 4e4742e..eeb58d1 100644 --- a/neurons/config.py +++ b/neurons/config.py @@ -20,6 +20,12 @@ def validator_config(): action="store_false", help="Turn off wandb logging.", ) + parser.add_argument( + "--wandb_project", + type=str, + default=constants.WANDB_PROJECT, + help="The wandb project to log to.", + ) parser.add_argument( "--blocks_per_epoch", type=int, diff --git a/neurons/validator.py b/neurons/validator.py index 376ddbe..c20dfcc 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -278,7 +278,7 @@ def _new_wandb_run(self): name = "validator-" + str(self.uid) + "-" + run_id self.wandb_run = wandb.init( name=name, - project=constants.WANDB_PROJECT, + project=self.config.wandb_project, entity="macrocosmos", config={ "uid": self.uid, @@ -725,21 +725,25 @@ async def run_step(self): pages_per_eval = constants.pages_per_eval_pack # If the option is set in the config, override - pages_per_eval = self.config.pages_per_eval if self.config.pages_per_eval is not None else pages_per_eval + pages_per_eval = ( + self.config.pages_per_eval + if self.config.pages_per_eval is not None + else pages_per_eval + ) - bt.logging.debug(f'Sample packing is set to: {pack_samples}.') - bt.logging.debug(f'Number of pages per evaluation step is: {pages_per_eval}') + bt.logging.debug(f"Sample packing is set to: {pack_samples}.") + bt.logging.debug(f"Number of pages per evaluation step is: {pages_per_eval}") dataloader = SubsetDataLoader( batch_size=constants.batch_size, sequence_length=competition.constraints.sequence_length, - num_pages= pages_per_eval, + num_pages=pages_per_eval, tokenizer=tokenizer, - pack_samples=pack_samples - ) + pack_samples=pack_samples, + ) batches = list(dataloader) - bt.logging.debug(f'Number of validation batches is {len(batches)}') + bt.logging.debug(f"Number of validation batches is {len(batches)}") # This is useful for logging to wandb pages = dataloader.get_page_names() @@ -795,7 +799,7 @@ async def run_step(self): batches, self.config.device, tokenizer.eos_token_id, - pack_samples + pack_samples, ), ttl=400, mode="spawn", @@ -912,7 +916,9 @@ async def run_step(self): # If the model has any significant weight, prioritize by weight with greater weights being kept first. # Then for the unweighted models, prioritize by win_rate. # Use the competition weights from the tracker which also handles moving averages. - tracker_competition_weights = self.competition_tracker.get_competition_weights(competition.id) + tracker_competition_weights = self.competition_tracker.get_competition_weights( + competition.id + ) model_prioritization = { uid: ( # Add 1 to ensure it is always greater than a win rate. @@ -952,7 +958,6 @@ async def run_step(self): compute_loss_perf, ) - # Increment the number of completed run steps by 1 self.run_step_count += 1 @@ -981,7 +986,9 @@ def log_step( } # The sub-competition weights - sub_competition_weights = torch.softmax(model_weights / constants.temperature, dim=0) + sub_competition_weights = torch.softmax( + model_weights / constants.temperature, dim=0 + ) for idx, uid in enumerate(uids): step_log["uid_data"][str(uid)] = { @@ -1069,7 +1076,10 @@ def log_step( str(uid): uid_data[str(uid)]["win_total"] for uid in uids }, "weight_data": {str(uid): self.weights[uid].item() for uid in uids}, - "norm_weight_data": {str(uid): sub_competition_weights[i].item() for i, uid in enumerate(uids)}, + "norm_weight_data": { + str(uid): sub_competition_weights[i].item() + for i, uid in enumerate(uids) + }, "competition_id": { str(uid): uid_to_competition_id[uid] for uid in uids @@ -1094,7 +1104,7 @@ def log_step( step=self.last_wandb_step, ) - self.last_wandb_step+=1 + self.last_wandb_step += 1 def _get_uids_to_competition_ids( self, From 76e239e4c383d5301f235a3432017d7d84fac7b3 Mon Sep 17 00:00:00 2001 From: rusticluftig Date: Wed, 21 Aug 2024 19:57:05 -0700 Subject: [PATCH 06/10] Bump transformers version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f48ddf..cece18c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ rich safetensors torch numpy -transformers==4.42.0 +transformers==4.44.1 wandb datasets flash-attn From 3427236d8bedb101ec89b99250ee78ea6ab1188f Mon Sep 17 00:00:00 2001 From: Alan Aboudib Date: Fri, 23 Aug 2024 18:06:30 +0000 Subject: [PATCH 07/10] bumped version to 4.4.1 and took down stablLm and Phi3 --- constants/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/constants/__init__.py b/constants/__init__.py index 302998c..e0d5ecb 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -14,11 +14,9 @@ GPTNeoXForCausalLM, GPTJForCausalLM, PhiForCausalLM, - Phi3ForCausalLM, GemmaForCausalLM, Gemma2ForCausalLM, Qwen2ForCausalLM, - StableLmForCausalLM, ) from taoverse.model.competition.data import ( @@ -35,7 +33,7 @@ # --------------------------------- # Release -__version__ = "4.1.0" +__version__ = "4.1.1" # Validator schema version __validator_version__ = "3.0.0" @@ -75,8 +73,6 @@ FalconForCausalLM, GPTNeoXForCausalLM, GPTJForCausalLM, - StableLmForCausalLM, - Phi3ForCausalLM, Qwen2ForCausalLM, } ALLOWED_MODEL_TYPES_2 = { @@ -88,8 +84,6 @@ PhiForCausalLM, GemmaForCausalLM, Gemma2ForCausalLM, - StableLmForCausalLM, - Phi3ForCausalLM, Qwen2ForCausalLM, } From b3f9e5f3c9635af17236eafda8338efc8170c50e Mon Sep 17 00:00:00 2001 From: Sid Date: Fri, 23 Aug 2024 10:53:02 -0700 Subject: [PATCH 08/10] Fix table wrapping and bump taoverse. --- neurons/validator.py | 38 +++++++++++++++++++------------------- requirements.txt | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/neurons/validator.py b/neurons/validator.py index 923b60e..23ff066 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -707,8 +707,8 @@ async def run_step(self): # Keep track of which block this uid last updated their model. # Default to an infinite block if we can't retrieve the metadata for the miner. uid_to_block = defaultdict(lambda: math.inf) - # Keep track of the hugging face url for this uid. - uid_to_hf_url = defaultdict(lambda: "unknown") + # Keep track of the hugging face repo for this uid. + uid_to_hf = defaultdict(lambda: "unknown") bt.logging.trace(f"Current block: {cur_block}") @@ -790,8 +790,8 @@ async def run_step(self): # Update the block this uid last updated their model. uid_to_block[uid_i] = model_i_metadata.block - # Update the hf url for this model. - uid_to_hf_url[uid_i] = model_utils.get_hf_url(model_i_metadata) + # Update the hf repo for this model. + uid_to_hf[uid_i] = model_utils.get_hf_repo_name(model_i_metadata) # Get the model locally and evaluate its loss. model_i = None @@ -893,7 +893,7 @@ async def run_step(self): CompetitionId.B7_MODEL_LOWER_EPSILON, uids, uid_to_block, - uid_to_hf_url, + uid_to_hf, uids_to_competition_ids_epsilon_experiment, pages, model_weights_epsilon_experiment, @@ -959,7 +959,7 @@ async def run_step(self): competition.id, uids, uid_to_block, - uid_to_hf_url, + uid_to_hf, self._get_uids_to_competition_ids(), pages, model_weights, @@ -978,7 +978,7 @@ def log_step( competition_id: CompetitionId, uids: typing.List[int], uid_to_block: typing.Dict[int, int], - uid_to_hf_url: typing.Dict[int, str], + uid_to_hf: typing.Dict[int, str], uid_to_competition_id: typing.Dict[int, typing.Optional[int]], pages: typing.List[str], model_weights: typing.List[float], @@ -1007,7 +1007,7 @@ def log_step( step_log["uid_data"][str(uid)] = { "uid": uid, "block": uid_to_block[uid], - "hf_url": uid_to_hf_url[uid], + "hf": uid_to_hf[uid], "competition_id": uid_to_competition_id[uid], "average_loss": sum(losses_per_uid[uid]) / len(losses_per_uid[uid]), "win_rate": win_rate[uid], @@ -1015,20 +1015,21 @@ def log_step( "weight": self.weights[uid].item(), "norm_weight": sub_competition_weights[idx].item(), } - table = Table(title="Step") + table = Table(title="Step", expand=True) table.add_column("uid", justify="right", style="cyan", no_wrap=True) - table.add_column("average_loss", style="magenta") - table.add_column("win_rate", style="magenta") - table.add_column("win_total", style="magenta") - table.add_column("weights", style="magenta") - table.add_column("competition_weights", style="magenta") - table.add_column("block", style="magenta") - table.add_column("competition", style="magenta") - table.add_column("hugging_face_url", style="magenta") + table.add_column("hf", style="magenta", overflow="fold") + table.add_column("average_loss", style="magenta", overflow="fold") + table.add_column("win_rate", style="magenta", overflow="fold") + table.add_column("win_total", style="magenta", overflow="fold") + table.add_column("total_weight", style="magenta", overflow="fold") + table.add_column("compe_weight", style="magenta", overflow="fold") + table.add_column("block", style="magenta", overflow="fold") + table.add_column("comp", style="magenta", overflow="fold") for idx, uid in enumerate(uids): try: table.add_row( str(uid), + str(step_log["uid_data"][str(uid)]["hf"]), str(round(step_log["uid_data"][str(uid)]["average_loss"], 4)), str(round(step_log["uid_data"][str(uid)]["win_rate"], 4)), str(step_log["uid_data"][str(uid)]["win_total"]), @@ -1036,7 +1037,6 @@ def log_step( str(round(sub_competition_weights[idx].item(), 4)), str(step_log["uid_data"][str(uid)]["block"]), str(step_log["uid_data"][str(uid)]["competition_id"]), - str(step_log["uid_data"][str(uid)]["hf_url"]), ) except: pass @@ -1092,7 +1092,7 @@ def log_step( str(uid): uid_data[str(uid)]["win_total"] for uid in uids }, "weight_data": {str(uid): self.weights[uid].item() for uid in uids}, - "norm_weight_data": { + "competition_weight_data": { str(uid): sub_competition_weights[i].item() for i, uid in enumerate(uids) }, diff --git a/requirements.txt b/requirements.txt index 08fa165..dcfee97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,4 @@ transformers==4.44.1 wandb datasets flash-attn -taoverse==1.0.1 +taoverse==1.0.2 From 2384ff60b0b7d0e3557edf2d3741d6a64d267dec Mon Sep 17 00:00:00 2001 From: Sid Date: Fri, 23 Aug 2024 11:04:41 -0700 Subject: [PATCH 09/10] Add currently unused epsilon_func for taoverse 1.0.2. --- constants/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/constants/__init__.py b/constants/__init__.py index e0d5ecb..d02abbe 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -24,6 +24,7 @@ ModelConstraints, NormValidationConstraints, ) +from taoverse.model.competition.epsilon import FixedEpsilon from competitions.data import CompetitionId from typing import Dict, List, Tuple @@ -103,6 +104,7 @@ allowed_architectures=ALLOWED_MODEL_TYPES_1, tokenizer="distilgpt2", eval_block_delay=0, + epsilon_func=FixedEpsilon(0.005), ), CompetitionId.B7_MODEL: ModelConstraints( max_model_parameter_size=6_900_000_000, @@ -115,6 +117,7 @@ "attn_implementation": "flash_attention_2", }, eval_block_delay=0, + epsilon_func=FixedEpsilon(0.005), ), CompetitionId.B3_MODEL: ModelConstraints( max_model_parameter_size=3_400_000_000, @@ -127,6 +130,7 @@ "attn_implementation": "flash_attention_2", }, eval_block_delay=0, + epsilon_func=FixedEpsilon(0.005), ), } From 288466b74454acabf9a75b9361c0970a6a81e64a Mon Sep 17 00:00:00 2001 From: Sid Date: Fri, 23 Aug 2024 11:28:13 -0700 Subject: [PATCH 10/10] Bump to 4.1.2 and fix typo. --- constants/__init__.py | 2 +- neurons/validator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/constants/__init__.py b/constants/__init__.py index d02abbe..2838c56 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -34,7 +34,7 @@ # --------------------------------- # Release -__version__ = "4.1.1" +__version__ = "4.1.2" # Validator schema version __validator_version__ = "3.0.0" diff --git a/neurons/validator.py b/neurons/validator.py index 23ff066..615eb9b 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -1022,7 +1022,7 @@ def log_step( table.add_column("win_rate", style="magenta", overflow="fold") table.add_column("win_total", style="magenta", overflow="fold") table.add_column("total_weight", style="magenta", overflow="fold") - table.add_column("compe_weight", style="magenta", overflow="fold") + table.add_column("comp_weight", style="magenta", overflow="fold") table.add_column("block", style="magenta", overflow="fold") table.add_column("comp", style="magenta", overflow="fold") for idx, uid in enumerate(uids):