Merge pull request #158 from macrocosm-os/dev

Release 4.1.2
macrocosm-os · Aug 23, 2024 · 72e772c · 72e772c
2 parents cb888b4 + f10dd01
commit 72e772c
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 34 deletions.
diff --git a/constants/__init__.py b/constants/__init__.py
@@ -14,18 +14,17 @@
     GPTNeoXForCausalLM,
     GPTJForCausalLM,
     PhiForCausalLM,
-    Phi3ForCausalLM,
     GemmaForCausalLM,
     Gemma2ForCausalLM,
     Qwen2ForCausalLM,
-    StableLmForCausalLM,
 )
 
 from taoverse.model.competition.data import (
     Competition,
     ModelConstraints,
     NormValidationConstraints,
 )
+from taoverse.model.competition.epsilon import FixedEpsilon
 from competitions.data import CompetitionId
 
 from typing import Dict, List, Tuple
@@ -35,7 +34,7 @@
 # ---------------------------------
 
 # Release
-__version__ = "4.1.0"
+__version__ = "4.1.2"
 
 # Validator schema version
 __validator_version__ = "3.0.0"
@@ -75,8 +74,6 @@
     FalconForCausalLM,
     GPTNeoXForCausalLM,
     GPTJForCausalLM,
-    StableLmForCausalLM,
-    Phi3ForCausalLM,
     Qwen2ForCausalLM,
 }
 ALLOWED_MODEL_TYPES_2 = {
@@ -88,8 +85,6 @@
     PhiForCausalLM,
     GemmaForCausalLM,
     Gemma2ForCausalLM,
-    StableLmForCausalLM,
-    Phi3ForCausalLM,
     Qwen2ForCausalLM,
 }
 
@@ -109,6 +104,7 @@
         allowed_architectures=ALLOWED_MODEL_TYPES_1,
         tokenizer="distilgpt2",
         eval_block_delay=0,
+        epsilon_func=FixedEpsilon(0.005),
     ),
     CompetitionId.B7_MODEL: ModelConstraints(
         max_model_parameter_size=6_900_000_000,
@@ -121,6 +117,7 @@
             "attn_implementation": "flash_attention_2",
         },
         eval_block_delay=0,
+        epsilon_func=FixedEpsilon(0.005),
     ),
     CompetitionId.B3_MODEL: ModelConstraints(
         max_model_parameter_size=3_400_000_000,
@@ -133,6 +130,7 @@
             "attn_implementation": "flash_attention_2",
         },
         eval_block_delay=0,
+        epsilon_func=FixedEpsilon(0.005),
     ),
 }
 

diff --git a/neurons/config.py b/neurons/config.py
@@ -20,6 +20,12 @@ def validator_config():
         action="store_false",
         help="Turn off wandb logging.",
     )
+    parser.add_argument(
+        "--wandb_project",
+        type=str,
+        default=constants.WANDB_PROJECT,
+        help="The wandb project to log to.",
+    )
     parser.add_argument(
         "--blocks_per_epoch",
         type=int,

diff --git a/neurons/validator.py b/neurons/validator.py
@@ -33,6 +33,7 @@
 import constants
 from taoverse.metagraph import utils as metagraph_utils
 from taoverse.metagraph.metagraph_syncer import MetagraphSyncer
+from taoverse.model import utils as model_utils
 from taoverse.model.competition import utils as competition_utils
 from taoverse.model.competition.competition_tracker import CompetitionTracker
 from taoverse.model.competition.data import Competition
@@ -278,7 +279,7 @@ def _new_wandb_run(self):
         name = "validator-" + str(self.uid) + "-" + run_id
         self.wandb_run = wandb.init(
             name=name,
-            project=constants.WANDB_PROJECT,
+            project=self.config.wandb_project,
             entity="macrocosmos",
             config={
                 "uid": self.uid,
@@ -702,9 +703,12 @@ async def run_step(self):
                 time.sleep(300)
             return
 
+        # TODO: Consider condensing the following + competition id into a uid to metadata map.
         # Keep track of which block this uid last updated their model.
         # Default to an infinite block if we can't retrieve the metadata for the miner.
         uid_to_block = defaultdict(lambda: math.inf)
+        # Keep track of the hugging face repo for this uid.
+        uid_to_hf = defaultdict(lambda: "unknown")
 
         bt.logging.trace(f"Current block: {cur_block}")
 
@@ -725,21 +729,25 @@ async def run_step(self):
             pages_per_eval = constants.pages_per_eval_pack
 
         # If the option is set in the config, override
-        pages_per_eval = self.config.pages_per_eval if self.config.pages_per_eval is not None else pages_per_eval
+        pages_per_eval = (
+            self.config.pages_per_eval
+            if self.config.pages_per_eval is not None
+            else pages_per_eval
+        )
 
-        bt.logging.debug(f'Sample packing is set to: {pack_samples}.')
-        bt.logging.debug(f'Number of pages per evaluation step is: {pages_per_eval}')
+        bt.logging.debug(f"Sample packing is set to: {pack_samples}.")
+        bt.logging.debug(f"Number of pages per evaluation step is: {pages_per_eval}")
 
         dataloader = SubsetDataLoader(
             batch_size=constants.batch_size,
             sequence_length=competition.constraints.sequence_length,
-            num_pages= pages_per_eval,
+            num_pages=pages_per_eval,
             tokenizer=tokenizer,
-            pack_samples=pack_samples
-            )
+            pack_samples=pack_samples,
+        )
 
         batches = list(dataloader)
-        bt.logging.debug(f'Number of validation batches is {len(batches)}')
+        bt.logging.debug(f"Number of validation batches is {len(batches)}")
 
         # This is useful for logging to wandb
         pages = dataloader.get_page_names()
@@ -758,11 +766,11 @@ async def run_step(self):
         compute_loss_perf = PerfMonitor("Eval: Compute loss")
 
         for uid_i in uids:
-            bt.logging.trace(f"Computing model losses for uid:{uid_i}.")
-
             # This variable should be overwritten below if the model has metadata.
             losses: typing.List[float] = [math.inf for _ in range(len(batches))]
 
+            bt.logging.trace(f"Getting metadata for uid: {uid_i}.")
+
             # Check that the model is in the tracker.
             with self.metagraph_lock:
                 hotkey = self.metagraph.hotkeys[uid_i]
@@ -776,8 +784,14 @@ async def run_step(self):
                 and model_i_metadata.id.competition_id == competition.id
             ):
                 try:
+                    bt.logging.info(
+                        f"Evaluating uid: {uid_i} / hotkey: {hotkey} with metadata: {model_i_metadata} and hf_url: {model_utils.get_hf_url(model_i_metadata)}."
+                    )
+
                     # Update the block this uid last updated their model.
                     uid_to_block[uid_i] = model_i_metadata.block
+                    # Update the hf repo for this model.
+                    uid_to_hf[uid_i] = model_utils.get_hf_repo_name(model_i_metadata)
 
                     # Get the model locally and evaluate its loss.
                     model_i = None
@@ -795,7 +809,7 @@ async def run_step(self):
                                 batches,
                                 self.config.device,
                                 tokenizer.eos_token_id,
-                                pack_samples
+                                pack_samples,
                             ),
                             ttl=400,
                             mode="spawn",
@@ -879,6 +893,7 @@ async def run_step(self):
                 CompetitionId.B7_MODEL_LOWER_EPSILON,
                 uids,
                 uid_to_block,
+                uid_to_hf,
                 uids_to_competition_ids_epsilon_experiment,
                 pages,
                 model_weights_epsilon_experiment,
@@ -912,7 +927,9 @@ async def run_step(self):
         # If the model has any significant weight, prioritize by weight with greater weights being kept first.
         # Then for the unweighted models, prioritize by win_rate.
         # Use the competition weights from the tracker which also handles moving averages.
-        tracker_competition_weights = self.competition_tracker.get_competition_weights(competition.id)
+        tracker_competition_weights = self.competition_tracker.get_competition_weights(
+            competition.id
+        )
         model_prioritization = {
             uid: (
                 # Add 1 to ensure it is always greater than a win rate.
@@ -942,6 +959,7 @@ async def run_step(self):
             competition.id,
             uids,
             uid_to_block,
+            uid_to_hf,
             self._get_uids_to_competition_ids(),
             pages,
             model_weights,
@@ -952,7 +970,6 @@ async def run_step(self):
             compute_loss_perf,
         )
 
-
         # Increment the number of completed run steps by 1
         self.run_step_count += 1
 
@@ -961,6 +978,7 @@ def log_step(
         competition_id: CompetitionId,
         uids: typing.List[int],
         uid_to_block: typing.Dict[int, int],
+        uid_to_hf: typing.Dict[int, str],
         uid_to_competition_id: typing.Dict[int, typing.Optional[int]],
         pages: typing.List[str],
         model_weights: typing.List[float],
@@ -981,32 +999,37 @@ def log_step(
         }
 
         # The sub-competition weights
-        sub_competition_weights = torch.softmax(model_weights / constants.temperature, dim=0)
+        sub_competition_weights = torch.softmax(
+            model_weights / constants.temperature, dim=0
+        )
 
         for idx, uid in enumerate(uids):
             step_log["uid_data"][str(uid)] = {
                 "uid": uid,
                 "block": uid_to_block[uid],
+                "hf": uid_to_hf[uid],
                 "competition_id": uid_to_competition_id[uid],
                 "average_loss": sum(losses_per_uid[uid]) / len(losses_per_uid[uid]),
                 "win_rate": win_rate[uid],
                 "win_total": wins[uid],
                 "weight": self.weights[uid].item(),
                 "norm_weight": sub_competition_weights[idx].item(),
             }
-        table = Table(title="Step")
+        table = Table(title="Step", expand=True)
         table.add_column("uid", justify="right", style="cyan", no_wrap=True)
-        table.add_column("average_loss", style="magenta")
-        table.add_column("win_rate", style="magenta")
-        table.add_column("win_total", style="magenta")
-        table.add_column("weights", style="magenta")
-        table.add_column("competition_weights", style="magenta")
-        table.add_column("block", style="magenta")
-        table.add_column("competition", style="magenta")
+        table.add_column("hf", style="magenta", overflow="fold")
+        table.add_column("average_loss", style="magenta", overflow="fold")
+        table.add_column("win_rate", style="magenta", overflow="fold")
+        table.add_column("win_total", style="magenta", overflow="fold")
+        table.add_column("total_weight", style="magenta", overflow="fold")
+        table.add_column("comp_weight", style="magenta", overflow="fold")
+        table.add_column("block", style="magenta", overflow="fold")
+        table.add_column("comp", style="magenta", overflow="fold")
         for idx, uid in enumerate(uids):
             try:
                 table.add_row(
                     str(uid),
+                    str(step_log["uid_data"][str(uid)]["hf"]),
                     str(round(step_log["uid_data"][str(uid)]["average_loss"], 4)),
                     str(round(step_log["uid_data"][str(uid)]["win_rate"], 4)),
                     str(step_log["uid_data"][str(uid)]["win_total"]),
@@ -1069,7 +1092,10 @@ def log_step(
                     str(uid): uid_data[str(uid)]["win_total"] for uid in uids
                 },
                 "weight_data": {str(uid): self.weights[uid].item() for uid in uids},
-                "norm_weight_data": {str(uid): sub_competition_weights[i].item() for i, uid in enumerate(uids)},
+                "competition_weight_data": {
+                    str(uid): sub_competition_weights[i].item()
+                    for i, uid in enumerate(uids)
+                },
                 "competition_id": {
                     str(uid): uid_to_competition_id[uid]
                     for uid in uids
@@ -1094,7 +1120,7 @@ def log_step(
                 step=self.last_wandb_step,
             )
 
-            self.last_wandb_step+=1
+            self.last_wandb_step += 1
 
     def _get_uids_to_competition_ids(
         self,

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-bittensor==6.9.3
+bittensor==6.9.4
 huggingface_hub
 matplotlib
 pydantic==1.10
@@ -7,8 +7,8 @@ rich
 safetensors
 torch
 numpy
-transformers==4.42.0
+transformers==4.44.1
 wandb
 datasets
 flash-attn
-taoverse==1.0.1
+taoverse==1.0.2