From 2e9d6ddaacd6ac57392726947cfdd7cddd71e6e6 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 20:27:31 -0700 Subject: [PATCH 1/6] Check generated outputs before calculating losses. --- pretrain/validation.py | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pretrain/validation.py b/pretrain/validation.py index bb63c73e..9936a319 100644 --- a/pretrain/validation.py +++ b/pretrain/validation.py @@ -99,6 +99,50 @@ def compute_losses( model.to(device) model.eval() + # First do a sanity check that the model outputs look reasonable. + # Grab 100 tokens from the first two batches as 'prompts'. (1 x Seq Length tensors.) + prompt_length = 100 + falcon_token_inputs_1 = batches[0][:, :prompt_length] + falcon_token_inputs_2 = batches[1][:, :prompt_length] + + # Generate 30 tokens of output from the model for each prompt. + output_length = 30 + # Only take the last 30 tokens since otherwise we also get the prompt ids. + generate_id1s = model.generate( + falcon_token_inputs_1.cuda(), + min_new_tokens=output_length, + max_new_tokens=output_length, + )[:, -output_length:] + generate_id2s = model.generate( + falcon_token_inputs_2.cuda(), + min_new_tokens=output_length, + max_new_tokens=output_length, + )[:, -output_length:] + + # Check if too many of the generated ids are the same between the two outputs. + if torch.sum(torch.eq(generate_id1s, generate_id2s)).item() >= output_length / 3: + bt.logging.info( + f"Model with config {model.config} had too much overlap between generated outputs." + ) + return [math.inf for _ in batches] + + # Check if internally either response is too repetitive. + for tensor in [generate_id1s, generate_id2s]: + # Find unique elements and their counts + _, counts = torch.unique(tensor, return_counts=True) + # Find the index of the maximum count + max_count_index = torch.argmax(counts) + # Extract the count of the most common element + most_common_count = counts[max_count_index].item() + + if most_common_count > output_length / 3: + bt.logging.info( + f"Model with config {model.config} had too much repetition in generated output." + ) + return [math.inf for _ in batches] + + # Everything looks good! Continue to computing actual losses. + # Iterate over each page and corresponding batches losses = [] for batch in batches: From 82e74a3f551beddb94ab6ae9d24a8ccdbbdc4370 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 20:36:16 -0700 Subject: [PATCH 2/6] Send inputs to the same device as the model. --- pretrain/validation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pretrain/validation.py b/pretrain/validation.py index 9936a319..acb9eb2d 100644 --- a/pretrain/validation.py +++ b/pretrain/validation.py @@ -102,19 +102,19 @@ def compute_losses( # First do a sanity check that the model outputs look reasonable. # Grab 100 tokens from the first two batches as 'prompts'. (1 x Seq Length tensors.) prompt_length = 100 - falcon_token_inputs_1 = batches[0][:, :prompt_length] - falcon_token_inputs_2 = batches[1][:, :prompt_length] + falcon_token_inputs_1 = (batches[0][:, :prompt_length]).to(device) + falcon_token_inputs_2 = (batches[1][:, :prompt_length]).to(device) # Generate 30 tokens of output from the model for each prompt. output_length = 30 # Only take the last 30 tokens since otherwise we also get the prompt ids. generate_id1s = model.generate( - falcon_token_inputs_1.cuda(), + falcon_token_inputs_1, min_new_tokens=output_length, max_new_tokens=output_length, )[:, -output_length:] generate_id2s = model.generate( - falcon_token_inputs_2.cuda(), + falcon_token_inputs_2, min_new_tokens=output_length, max_new_tokens=output_length, )[:, -output_length:] From 7eb4b4e0fbc816ef6c900d4bfa6c978b8bc4b3bd Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 20:54:50 -0700 Subject: [PATCH 3/6] Refactor check out to a helper function. --- pretrain/validation.py | 68 ++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/pretrain/validation.py b/pretrain/validation.py index acb9eb2d..0b30af8b 100644 --- a/pretrain/validation.py +++ b/pretrain/validation.py @@ -82,39 +82,29 @@ def compute_wins( return wins, win_rate -def compute_losses( - model, batches: typing.List[torch.Tensor], device: str -) -> typing.List[float]: - """ - Computes the losses for a given model on provided batches. +def check_for_reasonable_output( + model, input1: torch.Tensor, input2: torch.Tensor +) -> bool: + """Checks that a model generates reasonable outputs for two given inputs. - Parameters: - model (torch.nn.Module): The model for which losses are to be computed. - batches (dict): A list of batches. - device (str): The device to use for computation (e.g., 'cpu', 'gpu'). + Args: + model (torch.nn.Module): The model for which outputs are to be checked. Already loaded to device. + input1 (torch.Tensor]): Tokenized input1 to check. Already loaded to device. + input2 (torch.Tensor]): Tokenized input2 to check. Already loaded to device. Returns: - dict: A dictionary with page indices as keys and lists of loss values as values. + bool: If the model generates reasonable outputs. """ - model.to(device) - model.eval() - - # First do a sanity check that the model outputs look reasonable. - # Grab 100 tokens from the first two batches as 'prompts'. (1 x Seq Length tensors.) - prompt_length = 100 - falcon_token_inputs_1 = (batches[0][:, :prompt_length]).to(device) - falcon_token_inputs_2 = (batches[1][:, :prompt_length]).to(device) - # Generate 30 tokens of output from the model for each prompt. output_length = 30 # Only take the last 30 tokens since otherwise we also get the prompt ids. generate_id1s = model.generate( - falcon_token_inputs_1, + input1, min_new_tokens=output_length, max_new_tokens=output_length, )[:, -output_length:] generate_id2s = model.generate( - falcon_token_inputs_2, + input2, min_new_tokens=output_length, max_new_tokens=output_length, )[:, -output_length:] @@ -124,7 +114,7 @@ def compute_losses( bt.logging.info( f"Model with config {model.config} had too much overlap between generated outputs." ) - return [math.inf for _ in batches] + return False # Check if internally either response is too repetitive. for tensor in [generate_id1s, generate_id2s]: @@ -139,7 +129,39 @@ def compute_losses( bt.logging.info( f"Model with config {model.config} had too much repetition in generated output." ) - return [math.inf for _ in batches] + return False + + # Passed all the checks, return True. + return True + + +def compute_losses( + model, batches: typing.List[torch.Tensor], device: str +) -> typing.List[float]: + """ + Computes the losses for a given model on provided batches. + + Parameters: + model (torch.nn.Module): The model for which losses are to be computed. + batches (dict): A list of batches. + device (str): The device to use for computation (e.g., 'cpu', 'gpu'). + + Returns: + dict: A dictionary with page indices as keys and lists of loss values as values. + """ + model.to(device) + model.eval() + + # First check that model generates reasonable looking outputs. + # Grab 100 tokens from the first two batches as 'prompts'. (1 x Seq Length tensors.) + prompt_length = 100 + falcon_token_inputs_1 = (batches[0][:, :prompt_length]).to(device) + falcon_token_inputs_2 = (batches[1][:, :prompt_length]).to(device) + + if not check_for_reasonable_output( + model, falcon_token_inputs_1, falcon_token_inputs_2 + ): + return [math.inf for _ in batches] # Everything looks good! Continue to computing actual losses. From 1177610e07ac36a9613fc968a0acd197b66025b2 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 20:59:10 -0700 Subject: [PATCH 4/6] Bump spec version to force reload of models. --- constants/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constants/__init__.py b/constants/__init__.py index 5aaaabb4..e38d2f5a 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -13,7 +13,7 @@ # Project Constants. # --------------------------------- -__version__ = "2.2.1" +__version__ = "2.2.2" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) From 6160d49fa611f8b7706184ffda3a3f95e1460680 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 21:31:13 -0700 Subject: [PATCH 5/6] Pass tokenizer eos token id to remove warning message. --- pretrain/validation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pretrain/validation.py b/pretrain/validation.py index 0b30af8b..cfe0cc64 100644 --- a/pretrain/validation.py +++ b/pretrain/validation.py @@ -24,6 +24,7 @@ import constants import traceback import bittensor as bt +import pretrain as pt def iswin(loss_i, loss_j, block_i, block_j): @@ -97,16 +98,19 @@ def check_for_reasonable_output( """ # Generate 30 tokens of output from the model for each prompt. output_length = 30 + tokenizer = pt.model.get_tokenizer() # Only take the last 30 tokens since otherwise we also get the prompt ids. generate_id1s = model.generate( input1, min_new_tokens=output_length, max_new_tokens=output_length, + pad_token_id=tokenizer.eos_token_id, )[:, -output_length:] generate_id2s = model.generate( input2, min_new_tokens=output_length, max_new_tokens=output_length, + pad_token_id=tokenizer.eos_token_id, )[:, -output_length:] # Check if too many of the generated ids are the same between the two outputs. @@ -136,7 +140,9 @@ def check_for_reasonable_output( def compute_losses( - model, batches: typing.List[torch.Tensor], device: str + model, + batches: typing.List[torch.Tensor], + device: str, ) -> typing.List[float]: """ Computes the losses for a given model on provided batches. From d80f965c3eb7c59db04ac539191e6674afa49a9d Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 21 Mar 2024 21:38:40 -0700 Subject: [PATCH 6/6] Start iterator at 200 for fresh start. --- utilities/miner_iterator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utilities/miner_iterator.py b/utilities/miner_iterator.py index 12b8a27f..0a17cdf2 100644 --- a/utilities/miner_iterator.py +++ b/utilities/miner_iterator.py @@ -17,7 +17,8 @@ def __init__(self, miner_uids: List[int]): self.miner_uids = sorted(copy.deepcopy(miner_uids)) # Start the index at a random position. This helps ensure that miners with high UIDs aren't penalized if # the validator restarts frequently. - self.index = random.randint(0, len(self.miner_uids) - 1) + # Temporarily hard code to start at 200 to more quickly restart on the relevant models. + self.index = 200 self.lock = threading.Lock() def __iter__(self):