From 511ed18d06ed3f296631a358260b6000ecdfa383 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 27 Dec 2024 10:15:20 +0000 Subject: [PATCH 01/38] first version of refactor of variable scaling --- training/src/anemoi/training/data/scaling.py | 79 -------- .../src/anemoi/training/train/forecaster.py | 55 ++---- training/src/anemoi/training/train/scaling.py | 183 ++++++++++++++++++ 3 files changed, 197 insertions(+), 120 deletions(-) delete mode 100644 training/src/anemoi/training/data/scaling.py create mode 100644 training/src/anemoi/training/train/scaling.py diff --git a/training/src/anemoi/training/data/scaling.py b/training/src/anemoi/training/data/scaling.py deleted file mode 100644 index 83419a88..00000000 --- a/training/src/anemoi/training/data/scaling.py +++ /dev/null @@ -1,79 +0,0 @@ -# (C) Copyright 2024 Anemoi contributors. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - - -import logging -from abc import ABC -from abc import abstractmethod - -import numpy as np - -LOGGER = logging.getLogger(__name__) - - -class BasePressureLevelScaler(ABC): - """Configurable method converting pressure level of variable to PTL scaling. - - Scaling variables depending on pressure levels (50 to 1000). - """ - - def __init__(self, slope: float = 1.0 / 1000, minimum: float = 0.0) -> None: - """Initialise Scaler with slope and minimum. - - Parameters - ---------- - slope : float - Slope of the scaling function. - minimum : float - Minimum value of the scaling function. - - """ - self.slope = slope - self.minimum = minimum - - @abstractmethod - def scaler(self, plev: float) -> np.ndarray: ... - - -class LinearPressureLevelScaler(BasePressureLevelScaler): - """Linear with slope self.slope, yaxis shift by self.minimum.""" - - def scaler(self, plev: float) -> np.ndarray: - return plev * self.slope + self.minimum - - -class ReluPressureLevelScaler(BasePressureLevelScaler): - """Linear above self.minimum, taking constant value self.minimum below.""" - - def scaler(self, plev: float) -> np.ndarray: - return max(self.minimum, plev * self.slope) - - -class PolynomialPressureLevelScaler(BasePressureLevelScaler): - """Polynomial scaling, (slope * plev)^2, yaxis shift by self.minimum.""" - - def scaler(self, plev: float) -> np.ndarray: - return (self.slope * plev) ** 2 + self.minimum - - -class NoPressureLevelScaler(BasePressureLevelScaler): - """Constant scaling by 1.0.""" - - def __init__(self, slope: float = 0.0, minimum: float = 1.0) -> None: - """Initialise Scaler with constant scaling of 1.""" - assert ( - minimum == 1.0 and slope == 0 - ), "self.minimum must be 1.0 and self.slope 0.0 for no scaling to fit with definition of linear function." - super().__init__(slope=0.0, minimum=1.0) - - @staticmethod - def scaler(plev: float) -> np.ndarray: - del plev # unused - # no scaling, always return 1.0 - return 1.0 diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index e88db201..18f6a4bc 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -15,7 +15,6 @@ from typing import Optional from typing import Union -import numpy as np import pytorch_lightning as pl import torch from hydra.utils import instantiate @@ -31,6 +30,7 @@ from anemoi.models.interface import AnemoiModelInterface from anemoi.training.losses.utils import grad_scaler from anemoi.training.losses.weightedloss import BaseWeightedLoss +from anemoi.training.train.scaling import GeneralVariableLossScaler from anemoi.training.utils.jsonify import map_config_to_primitives from anemoi.training.utils.masks import Boolean1DMask from anemoi.training.utils.masks import NoOutputMask @@ -98,7 +98,18 @@ def __init__( self.logger_enabled = config.diagnostics.log.wandb.enabled or config.diagnostics.log.mlflow.enabled - variable_scaling = self.get_variable_scaling(config, data_indices) + variable_scaling = GeneralVariableLossScaler( + config.training.variable_loss_scaling, + data_indices, + ).get_variable_scaling() + + # Instantiate the pressure level scaling class with the training configuration + pressurelevelscaler = instantiate( + config.training.pressure_level_scaler, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, + ) + pressure_level_scaling = pressurelevelscaler.get_variable_scaling() self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) @@ -117,6 +128,7 @@ def __init__( # Filled after first application of preprocessor. dimension=[-2, -1] (latlon, n_outputs). self.scalars = { "variable": (-1, variable_scaling), + "variable_pressure_level": (-1, pressure_level_scaling), "loss_weights_mask": ((-2, -1), torch.ones((1, 1))), "limited_area_mask": (2, limited_area_mask), } @@ -299,45 +311,6 @@ def get_val_metric_ranges(config: DictConfig, data_indices: IndexCollection) -> return metric_ranges, metric_ranges_validation - @staticmethod - def get_variable_scaling( - config: DictConfig, - data_indices: IndexCollection, - ) -> torch.Tensor: - variable_loss_scaling = ( - np.ones((len(data_indices.internal_data.output.full),), dtype=np.float32) - * config.training.variable_loss_scaling.default - ) - pressure_level = instantiate(config.training.pressure_level_scaler) - - LOGGER.info( - "Pressure level scaling: use scaler %s with slope %.4f and minimum %.2f", - type(pressure_level).__name__, - pressure_level.slope, - pressure_level.minimum, - ) - - for key, idx in data_indices.internal_model.output.name_to_index.items(): - split = key.split("_") - if len(split) > 1 and split[-1].isdigit(): - # Apply pressure level scaling - if split[0] in config.training.variable_loss_scaling.pl: - variable_loss_scaling[idx] = config.training.variable_loss_scaling.pl[ - split[0] - ] * pressure_level.scaler( - int(split[-1]), - ) - else: - LOGGER.debug("Parameter %s was not scaled.", key) - else: - # Apply surface variable scaling - if key in config.training.variable_loss_scaling.sfc: - variable_loss_scaling[idx] = config.training.variable_loss_scaling.sfc[key] - else: - LOGGER.debug("Parameter %s was not scaled.", key) - - return torch.from_numpy(variable_loss_scaling) - @staticmethod def get_node_weights(config: DictConfig, graph_data: HeteroData) -> torch.Tensor: node_weighting = instantiate(config.training.node_loss_weights) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py new file mode 100644 index 00000000..f4d50b91 --- /dev/null +++ b/training/src/anemoi/training/train/scaling.py @@ -0,0 +1,183 @@ +# (C) Copyright 2024 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +import logging +from abc import ABC +from abc import abstractmethod + +import numpy as np +from omegaconf import DictConfig + +from anemoi.models.data_indices.collection import IndexCollection + +LOGGER = logging.getLogger(__name__) + + +class BaseVariableLossScaler(ABC): + """Configurable method converting variable to loss scaling.""" + + def __init__(self, scaling_config: DictConfig, data_indices: IndexCollection) -> None: + """Initialise Scaler. + + Parameters + ---------- + scaling_config : + data_indices : + + """ + self.scaling_config = scaling_config + self.data_indices = data_indices + self.variable_groups = self.scaling_config.variable_groups + # turn dictionary around + self.group_variables = {} + for group, variables in self.variable_groups.items(): + if isinstance(variables, str): + variables = [variables] + for variable in variables: + self.group_variables[variable] = group + self.default_group = self.scaling_config.variable_groups.default + + @abstractmethod + def get_variable_scaling(self) -> np.ndarray: ... + + def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: + """Get the group of a variable. + + Parameters + ---------- + variable_name : str + Name of the variable. + + Returns + ------- + str + Group of the variable. + + """ + split = variable_name.split("_") + variable_level = None + if len(split) > 1 and split[-1].isdigit(): + variable_level = int(split[-1]) + variable_name = variable_name[: -len(split[-1]) - 1] + if variable_name in self.group_variables: + return self.group_variables[variable_name], variable_name, variable_level + return self.default_group, variable_name, variable_level + + +class GeneralVariableLossScaler(BaseVariableLossScaler): + """General scaling of variables to loss scaling.""" + + def get_variable_scaling(self) -> np.ndarray: + variable_loss_scaling = ( + np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) * self.scaling_config.default + ) + + for variable_name, idx in self.data_indices.internal_model.output.name_to_index.items(): + _, variable_ref, _ = self.get_variable_group(variable_name) + # Apply variable scaling + variable_loss_scaling[idx] = self.scaling_config.get(variable_name, 1.0) * self.scaling_config.get( + variable_ref, + 1.0, + ) + + return variable_loss_scaling + + +class BaseVariableLevelScaler(BaseVariableLossScaler): + """Configurable method converting variable level to scaling.""" + + def __init__( + self, + scaling_config: DictConfig, + data_indices: IndexCollection, + group: str, + y_intercept: float, + slope: float, + ) -> None: + """Initialise variable level scaler. + + Parameters + ---------- + scaling_config : DictConfig + Configuration for variable loss scaling. + data_indices : IndexCollection + Collection of data indices. + group : str + Group of variables to scale. + y_intercept : float + Y-axis shift of scaling function. + slope : float + Slope of scaling function. + """ + super().__init__(scaling_config, data_indices) + self.scaling_group = group + self.y_intercept = y_intercept + self.slope = slope + + @abstractmethod + def get_level_scaling(self, variable_level: int) -> float: ... + + def get_variable_scaling(self) -> np.ndarray: + variable_level_scaling = np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) + + for variable_name, idx in self.data_indices.internal_model.output.name_to_index.items(): + variable_group, _, variable_level = self.get_variable_group(variable_name) + if variable_group != self.scaling_group: + continue + # Apply variable level scaling + assert variable_level is not None, f"Variable {variable_name} has no level to scale." + variable_level_scaling[idx] = self.get_level_scaling(float(variable_level)) + + return variable_level_scaling + + +class LinearVariableLevelScaler(BaseVariableLevelScaler): + """Linear with slope self.slope, yaxis shift by self.y_intercept.""" + + def get_level_scaling(self, variable_level: float) -> np.ndarray: + return variable_level * self.slope + self.y_intercept + + +class ReluVariableLevelScaler(BaseVariableLevelScaler): + """Linear above self.y_intercept, taking constant value self.y_intercept below.""" + + def get_level_scaling(self, variable_level: float) -> np.ndarray: + return max(self.y_intercept, variable_level * self.slope) + + +class PolynomialVariableLevelScaler(BaseVariableLevelScaler): + """Polynomial scaling, (slope * variable_level)^2, yaxis shift by self.y_intercept.""" + + def get_level_scaling(self, variable_level: float) -> np.ndarray: + return (self.slope * variable_level) ** 2 + self.y_intercept + + +class NoVariableLevelScaler(BaseVariableLevelScaler): + """Constant scaling by 1.0.""" + + def __init__( + self, + scaling_config: DictConfig, + data_indices: IndexCollection, + group: str, + slope: float = 0.0, + y_intercept: float = 1.0, + ) -> None: + """Initialise Scaler with constant scaling of 1.""" + assert ( + y_intercept == 1.0 and slope == 0 + ), "self.y_intercept must be 1.0 and self.slope 0.0 for no scaling to fit with definition of linear function." + super().__init__(scaling_config, data_indices, group, slope=0.0, y_intercept=1.0) + + @staticmethod + def get_level_scaling(variable_level: float) -> np.ndarray: + del variable_level # unused + # no scaling, always return 1.0 + return 1.0 From 7ddf6d6cab66659dbe99b6d782058b30841a14e6 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 27 Dec 2024 10:24:04 +0000 Subject: [PATCH 02/38] config training changes --- .../training/config/training/default.yaml | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 6c915eb5..33ad631a 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -50,7 +50,7 @@ training_loss: # Available scalars include: # - 'variable': See `variable_loss_scaling` for more information # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function - scalars: ['variable', 'loss_weights_mask'] + scalars: ['variable', 'variable_pressure_level', 'loss_weights_mask'] ignore_nans: False @@ -109,21 +109,28 @@ lr: # Variable loss scaling # 'variable' must be included in `scalars` in the losses for this to be applied. variable_loss_scaling: + variable_groups: + default: sfc + pl: [q, t, u, v, w, z] default: 1 - pl: - q: 0.6 #1 - t: 6 #1 - u: 0.8 #0.5 - v: 0.5 #0.33 - w: 0.001 - z: 12 #1 - sfc: - sp: 10 - 10u: 0.1 - 10v: 0.1 - 2d: 0.5 - tp: 0.025 - cp: 0.0025 + q: 0.6 #1 + t: 6 #1 + u: 0.8 #0.5 + v: 0.5 #0.33 + w: 0.001 + z: 12 #1 + sp: 10 + 10u: 0.1 + 10v: 0.1 + 2d: 0.5 + tp: 0.025 + cp: 0.0025 + # 'variable_pressure_level' must be included in `scalars` in the losses for this to be applied. + pressure_level_scaler: + _target_: anemoi.training.train.scaling.ReluVariableLevelScaler + group: pl + y_intercept: 0.2 + slope: 0.001 metrics: - z_500 @@ -131,11 +138,6 @@ metrics: - u_850 - v_850 -pressure_level_scaler: - _target_: anemoi.training.data.scaling.ReluPressureLevelScaler - minimum: 0.2 - slope: 0.001 - node_loss_weights: _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute target_nodes: ${graph.data} From 3ddeccc5695d1b6a813248a0742c4d2cea7ca46a Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 27 Dec 2024 12:27:44 +0000 Subject: [PATCH 03/38] avoid multiple scaling --- training/src/anemoi/training/train/scaling.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index f4d50b91..37e10b9d 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -82,10 +82,17 @@ def get_variable_scaling(self) -> np.ndarray: for variable_name, idx in self.data_indices.internal_model.output.name_to_index.items(): _, variable_ref, _ = self.get_variable_group(variable_name) # Apply variable scaling - variable_loss_scaling[idx] = self.scaling_config.get(variable_name, 1.0) * self.scaling_config.get( + variable_loss_scaling[idx] = self.scaling_config.get( variable_ref, 1.0, ) + # TODO(all): do we want to allow scaling by variable ref and variable name?, + # i.e. scale q_50 by value for q_50 AND q + if variable_ref != variable_name: + variable_loss_scaling[idx] *= self.scaling_config.get( + variable_name, + 1.0, + ) return variable_loss_scaling From be4602c1ca78210b664bae41fa9b20e164c18732 Mon Sep 17 00:00:00 2001 From: Sara Hahner <44293258+sahahner@users.noreply.github.com> Date: Tue, 31 Dec 2024 09:56:34 +0100 Subject: [PATCH 04/38] docstring and explain variable reference --- training/src/anemoi/training/train/scaling.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 37e10b9d..bf5f2f1f 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -58,7 +58,11 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Returns ------- str - Group of the variable. + Group of the variable given in the training-config file. + str + Variable reference which corresponds to the variable name without the variable level + str + Variable level, i.e. pressure level or model level """ split = variable_name.split("_") @@ -81,12 +85,12 @@ def get_variable_scaling(self) -> np.ndarray: for variable_name, idx in self.data_indices.internal_model.output.name_to_index.items(): _, variable_ref, _ = self.get_variable_group(variable_name) - # Apply variable scaling + # Apply variable scaling by base variable name (variable_ref: variable name without variable level) variable_loss_scaling[idx] = self.scaling_config.get( variable_ref, 1.0, ) - # TODO(all): do we want to allow scaling by variable ref and variable name?, + # TODO(all): do we want to allow scaling by variable_ref and variable_name? # i.e. scale q_50 by value for q_50 AND q if variable_ref != variable_name: variable_loss_scaling[idx] *= self.scaling_config.get( From 195af07187594c7acff360feacc90aa6f43938af Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Tue, 31 Dec 2024 10:47:12 +0000 Subject: [PATCH 05/38] fix to config for pressure level scaler --- .../src/anemoi/training/config/training/default.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 33ad631a..c42f6473 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -126,11 +126,11 @@ variable_loss_scaling: tp: 0.025 cp: 0.0025 # 'variable_pressure_level' must be included in `scalars` in the losses for this to be applied. - pressure_level_scaler: - _target_: anemoi.training.train.scaling.ReluVariableLevelScaler - group: pl - y_intercept: 0.2 - slope: 0.001 +pressure_level_scaler: + _target_: anemoi.training.train.scaling.ReluVariableLevelScaler + group: pl + y_intercept: 0.2 + slope: 0.001 metrics: - z_500 From 2644c186dcb5a1fed3111dd76af232cec1ece11c Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Tue, 31 Dec 2024 12:15:10 +0000 Subject: [PATCH 06/38] instantiating scalars as a list --- .../training/config/training/default.yaml | 18 +++++++++------- .../src/anemoi/training/train/forecaster.py | 21 ++++++++++++------- training/src/anemoi/training/train/scaling.py | 19 ++++++++++++++++- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index c42f6473..1601e1e5 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -50,7 +50,7 @@ training_loss: # Available scalars include: # - 'variable': See `variable_loss_scaling` for more information # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function - scalars: ['variable', 'variable_pressure_level', 'loss_weights_mask'] + scalars: ['variable', 'loss_weights_mask'] ignore_nans: False @@ -125,12 +125,16 @@ variable_loss_scaling: 2d: 0.5 tp: 0.025 cp: 0.0025 - # 'variable_pressure_level' must be included in `scalars` in the losses for this to be applied. -pressure_level_scaler: - _target_: anemoi.training.train.scaling.ReluVariableLevelScaler - group: pl - y_intercept: 0.2 - slope: 0.001 +additional_scalars: + # pressure level scalar + - _target_: anemoi.training.train.scaling.ReluVariableLevelScaler + group: pl + y_intercept: 0.2 + slope: 0.001 + scale_dim: -1 # dimension on which scaling applied + name: "variable_pressure_level" + # norm tendency scalar (scaling loss function by the normalised tendency values) + #- _target_: anemoi.training.data.scaling.NormTendencyScaler metrics: - z_500 diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 18f6a4bc..929f5f21 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -104,12 +104,15 @@ def __init__( ).get_variable_scaling() # Instantiate the pressure level scaling class with the training configuration - pressurelevelscaler = instantiate( - config.training.pressure_level_scaler, - scaling_config=config.training.variable_loss_scaling, - data_indices=data_indices, - ) - pressure_level_scaling = pressurelevelscaler.get_variable_scaling() + config_container = OmegaConf.to_container(config.training.additional_scalars, resolve=False) + if isinstance(config_container, list): + scalar = [instantiate( + scalar_config, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, + ) + for scalar_config in config_container + ] self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) @@ -128,10 +131,12 @@ def __init__( # Filled after first application of preprocessor. dimension=[-2, -1] (latlon, n_outputs). self.scalars = { "variable": (-1, variable_scaling), - "variable_pressure_level": (-1, pressure_level_scaling), "loss_weights_mask": ((-2, -1), torch.ones((1, 1))), - "limited_area_mask": (2, limited_area_mask), + "limited_area_mask": (2, limited_area_mask) } + # add addtional user-defined scalars + [self.scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] + self.updated_loss_mask = False self.loss = self.get_loss_function(config.training.training_loss, scalars=self.scalars, **loss_kwargs) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 37e10b9d..93caa961 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -107,6 +107,8 @@ def __init__( group: str, y_intercept: float, slope: float, + name: str, + scale_dim: int, ) -> None: """Initialise variable level scaler. @@ -127,6 +129,8 @@ def __init__( self.scaling_group = group self.y_intercept = y_intercept self.slope = slope + self.name = name + self.scale_dim = scale_dim @abstractmethod def get_level_scaling(self, variable_level: int) -> float: ... @@ -144,7 +148,6 @@ def get_variable_scaling(self) -> np.ndarray: return variable_level_scaling - class LinearVariableLevelScaler(BaseVariableLevelScaler): """Linear with slope self.slope, yaxis shift by self.y_intercept.""" @@ -188,3 +191,17 @@ def get_level_scaling(variable_level: float) -> np.ndarray: del variable_level # unused # no scaling, always return 1.0 return 1.0 + + +class BaseTendencyScaler(ABC): + """Configurable method to scale prognostic variables based on data statistics and statistics_tendencies.""" + + @abstractmethod + def scaler(self, variable_stdev: float, variable_tendency_stdev: float) -> float: ... + + +class NormTendencyScaler(BaseTendencyScaler): + """Scale loses by stdev of tendency statistics.""" + @staticmethod + def scaler(variable_stdev: float, variable_tendency_stdev: float) -> float: + return variable_stdev / variable_tendency_stdev \ No newline at end of file From 718fc5747188052fff2b9a26f90b67741364394f Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Tue, 31 Dec 2024 12:56:55 +0000 Subject: [PATCH 07/38] preparing for tendency losses --- .../anemoi/training/config/training/default.yaml | 2 +- training/src/anemoi/training/data/datamodule.py | 5 +++++ training/src/anemoi/training/data/dataset.py | 13 +++++++++++++ training/src/anemoi/training/train/forecaster.py | 2 ++ training/src/anemoi/training/train/train.py | 1 + 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 1601e1e5..1efa475d 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -50,7 +50,7 @@ training_loss: # Available scalars include: # - 'variable': See `variable_loss_scaling` for more information # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function - scalars: ['variable', 'loss_weights_mask'] + scalars: ['variable', 'variable_pressure_level', 'loss_weights_mask'] ignore_nans: False diff --git a/training/src/anemoi/training/data/datamodule.py b/training/src/anemoi/training/data/datamodule.py index d50af8e9..6f97d0df 100644 --- a/training/src/anemoi/training/data/datamodule.py +++ b/training/src/anemoi/training/data/datamodule.py @@ -73,6 +73,10 @@ def __init__(self, config: DictConfig, graph_data: HeteroData) -> None: def statistics(self) -> dict: return self.ds_train.statistics + @cached_property + def statistics_tendencies(self) -> dict: + return self.ds_train.statistics_tendencies + @cached_property def metadata(self) -> dict: return self.ds_train.metadata @@ -183,6 +187,7 @@ def _get_dataset( rollout=r, multistep=self.config.training.multistep_input, timeincrement=self.timeincrement, + timestep=self.config.data.timestep, shuffle=shuffle, grid_indices=self.grid_indices, label=label, diff --git a/training/src/anemoi/training/data/dataset.py b/training/src/anemoi/training/data/dataset.py index 431f0227..3e58a5e5 100644 --- a/training/src/anemoi/training/data/dataset.py +++ b/training/src/anemoi/training/data/dataset.py @@ -41,6 +41,7 @@ def __init__( rollout: int = 1, multistep: int = 1, timeincrement: int = 1, + timestep: str = "6h", shuffle: bool = True, label: str = "generic", effective_bs: int = 1, @@ -57,6 +58,8 @@ def __init__( length of rollout window, by default 12 timeincrement : int, optional time increment between samples, by default 1 + timestep : int, optional + the time frequency of the samples, by default '6h' multistep : int, optional collate (t-1, ... t - multistep) into the input state vector, by default 1 shuffle : bool, optional @@ -73,6 +76,7 @@ def __init__( self.rollout = rollout self.timeincrement = timeincrement + self.timestep = timestep self.grid_indices = grid_indices # lazy init @@ -104,6 +108,15 @@ def statistics(self) -> dict: """Return dataset statistics.""" return self.data.statistics + @cached_property + def statistics_tendencies(self) -> dict: + """Return dataset tendency statistics.""" + # The statistics_tendencies are lazily loaded + self.data.statistics_tendencies = ( + self.data.statistics_tendencies(self.timestep) if callable(self.data.statistics_tendencies) else None + ) + return self.data.statistics_tendencies + @cached_property def metadata(self) -> dict: """Return dataset metadata.""" diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 929f5f21..94b42441 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -48,6 +48,7 @@ def __init__( config: DictConfig, graph_data: HeteroData, statistics: dict, + statistics_tendencies: dict, data_indices: IndexCollection, metadata: dict, supporting_arrays: dict, @@ -95,6 +96,7 @@ def __init__( self.latlons_data = graph_data[config.graph.data].x self.node_weights = self.get_node_weights(config, graph_data) self.node_weights = self.output_mask.apply(self.node_weights, dim=0, fill_value=0.0) + self.statistics_tendencies = statistics_tendencies self.logger_enabled = config.diagnostics.log.wandb.enabled or config.diagnostics.log.mlflow.enabled diff --git a/training/src/anemoi/training/train/train.py b/training/src/anemoi/training/train/train.py index d786c13a..9d0e6349 100644 --- a/training/src/anemoi/training/train/train.py +++ b/training/src/anemoi/training/train/train.py @@ -150,6 +150,7 @@ def model(self) -> GraphForecaster: "graph_data": self.graph_data, "metadata": self.metadata, "statistics": self.datamodule.statistics, + "statistics_tendencies": self.datamodule.statistics_tendencies, "supporting_arrays": self.supporting_arrays, } From b91af11f841ab95c6841cc7b0d50465b08f45b98 Mon Sep 17 00:00:00 2001 From: sahahner Date: Thu, 2 Jan 2025 11:48:04 +0000 Subject: [PATCH 08/38] log the variable level scaling information as before --- training/src/anemoi/training/train/scaling.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index e0151300..1f41307f 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -142,6 +142,14 @@ def get_level_scaling(self, variable_level: int) -> float: ... def get_variable_scaling(self) -> np.ndarray: variable_level_scaling = np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) + LOGGER.info( + "Variable Level Scaling: Applying %s scaling to %s variables (%s)", + self.name, + self.scaling_group, + self.variable_groups[self.scaling_group], + ) + LOGGER.info("with slope = %s and y-intercept/minimum = %s.", self.slope, self.y_intercept) + for variable_name, idx in self.data_indices.internal_model.output.name_to_index.items(): variable_group, _, variable_level = self.get_variable_group(variable_name) if variable_group != self.scaling_group: @@ -152,6 +160,7 @@ def get_variable_scaling(self) -> np.ndarray: return variable_level_scaling + class LinearVariableLevelScaler(BaseVariableLevelScaler): """Linear with slope self.slope, yaxis shift by self.y_intercept.""" @@ -206,6 +215,7 @@ def scaler(self, variable_stdev: float, variable_tendency_stdev: float) -> float class NormTendencyScaler(BaseTendencyScaler): """Scale loses by stdev of tendency statistics.""" + @staticmethod def scaler(variable_stdev: float, variable_tendency_stdev: float) -> float: - return variable_stdev / variable_tendency_stdev \ No newline at end of file + return variable_stdev / variable_tendency_stdev From c22c50b3523a0a7a176f360a7a4ff0fa37a1777c Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Wed, 8 Jan 2025 15:01:27 +0000 Subject: [PATCH 09/38] adding tendency scaler to additional scalers --- .../training/config/training/default.yaml | 9 ++- training/src/anemoi/training/data/dataset.py | 9 +-- .../src/anemoi/training/train/forecaster.py | 8 ++ training/src/anemoi/training/train/scaling.py | 78 +++++++++++++++++-- 4 files changed, 90 insertions(+), 14 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 1efa475d..589b78a1 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -133,8 +133,13 @@ additional_scalars: slope: 0.001 scale_dim: -1 # dimension on which scaling applied name: "variable_pressure_level" - # norm tendency scalar (scaling loss function by the normalised tendency values) - #- _target_: anemoi.training.data.scaling.NormTendencyScaler + # stdev tendency scalar + # scale the prognostic losses by the stdev of the variable tendencies (e.g. the 6-hourly differences of the data) + # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere) + # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables + # - _target_: anemoi.training.data.scaling.StdevTendencyScaler + # scale_dim: -1 # dimension on which scaling applied + # name: "tendency" metrics: - z_500 diff --git a/training/src/anemoi/training/data/dataset.py b/training/src/anemoi/training/data/dataset.py index 3e58a5e5..07aca06d 100644 --- a/training/src/anemoi/training/data/dataset.py +++ b/training/src/anemoi/training/data/dataset.py @@ -111,11 +111,10 @@ def statistics(self) -> dict: @cached_property def statistics_tendencies(self) -> dict: """Return dataset tendency statistics.""" - # The statistics_tendencies are lazily loaded - self.data.statistics_tendencies = ( - self.data.statistics_tendencies(self.timestep) if callable(self.data.statistics_tendencies) else None - ) - return self.data.statistics_tendencies + try: + return self.data.statistics_tendencies(self.timestep) + except (KeyError, AttributeError): + return None @cached_property def metadata(self) -> dict: diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 94b42441..6cbd047c 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -112,6 +112,14 @@ def __init__( scalar_config, scaling_config=config.training.variable_loss_scaling, data_indices=data_indices, + statistics=statistics, + statistics_tendencies=statistics_tendencies, + ) + if scalar_config["name"] == "tendency" else + instantiate( + scalar_config, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, ) for scalar_config in config_container ] diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 1f41307f..36a392ff 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -9,6 +9,7 @@ import logging +import warnings from abc import ABC from abc import abstractmethod @@ -204,18 +205,81 @@ def get_level_scaling(variable_level: float) -> np.ndarray: del variable_level # unused # no scaling, always return 1.0 return 1.0 + - -class BaseTendencyScaler(ABC): +class BaseTendencyScaler(BaseVariableLossScaler): """Configurable method to scale prognostic variables based on data statistics and statistics_tendencies.""" + def __init__( + self, + scaling_config: DictConfig, + data_indices: IndexCollection, + statistics: dict, + statistics_tendencies: dict, + name: str, + scale_dim: int, + ) -> None: + """Initialise variable level scaler. + + Parameters + ---------- + scaling_config : DictConfig + Configuration for variable loss scaling. + data_indices : IndexCollection + Collection of data indices. + statistics : dict + Data statistics dictionary + statistics_tendencies : dict + Data statistics dictionary for tendencies + """ + super().__init__(scaling_config, data_indices) + self.statistics = statistics + self.statistics_tendencies = statistics_tendencies + self.name = name + self.scale_dim = scale_dim + + if not self.statistics_tendencies: + warnings.warn("Dataset has no tendency statistics! Are you sure you want to use a tendency scaler?") + @abstractmethod - def scaler(self, variable_stdev: float, variable_tendency_stdev: float) -> float: ... + def get_level_scaling(self, variable_level: int) -> float: ... + def get_variable_scaling(self) -> np.ndarray: + variable_level_scaling = np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) -class NormTendencyScaler(BaseTendencyScaler): - """Scale loses by stdev of tendency statistics.""" + LOGGER.info("Variable Level Scaling: Applying %s scaling to prognostic variables", self.name) - @staticmethod - def scaler(variable_stdev: float, variable_tendency_stdev: float) -> float: + for key, idx in self.data_indices.internal_model.output.name_to_index.items(): + if idx in self.data_indices.internal_model.output.prognostic: + prog_idx = self.data_indices.data.output.name_to_index[key] + # variable_stdev = 1 / self.model.pre_processors.processors.normalizer._norm_mul[prog_idx] + variable_stdev = self.statistics["stdev"][prog_idx] if self.statistics_tendencies else 1 + variable_tendency_stdev = ( + self.statistics_tendencies["stdev"][prog_idx] if self.statistics_tendencies else 1 + ) + scaling = self.get_level_scaling(variable_stdev, variable_tendency_stdev) + LOGGER.info("Parameter %s is being scaled by statistic_tendencies by %.2f", key, scaling) + variable_level_scaling[idx] *= scaling + + return variable_level_scaling + + +class NoTendencyScaler(BaseTendencyScaler): + """No scaling by tendency statistics.""" + + def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: + return 1.0 + + +class StdevTendencyScaler(BaseTendencyScaler): + """Scale loses by standard deviation of tendency statistics.""" + + def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: return variable_stdev / variable_tendency_stdev + + +class VarTendencyScaler(BaseTendencyScaler): + """Scale loses by variance of tendency statistics.""" + + def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: + return variable_stdev / variable_tendency_stdev**2 \ No newline at end of file From 1f4a53271af0dfd33e1686b9e9e7e1d7f8d70d1a Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Wed, 8 Jan 2025 15:14:23 +0000 Subject: [PATCH 10/38] reformatting --- .../src/anemoi/training/config/training/default.yaml | 2 +- training/src/anemoi/training/train/scaling.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 589b78a1..f5ac39cf 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -133,7 +133,7 @@ additional_scalars: slope: 0.001 scale_dim: -1 # dimension on which scaling applied name: "variable_pressure_level" - # stdev tendency scalar + # stdev tendency scaler # scale the prognostic losses by the stdev of the variable tendencies (e.g. the 6-hourly differences of the data) # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere) # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 36a392ff..7224bb9e 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -205,7 +205,7 @@ def get_level_scaling(variable_level: float) -> np.ndarray: del variable_level # unused # no scaling, always return 1.0 return 1.0 - + class BaseTendencyScaler(BaseVariableLossScaler): """Configurable method to scale prognostic variables based on data statistics and statistics_tendencies.""" @@ -252,7 +252,6 @@ def get_variable_scaling(self) -> np.ndarray: for key, idx in self.data_indices.internal_model.output.name_to_index.items(): if idx in self.data_indices.internal_model.output.prognostic: prog_idx = self.data_indices.data.output.name_to_index[key] - # variable_stdev = 1 / self.model.pre_processors.processors.normalizer._norm_mul[prog_idx] variable_stdev = self.statistics["stdev"][prog_idx] if self.statistics_tendencies else 1 variable_tendency_stdev = ( self.statistics_tendencies["stdev"][prog_idx] if self.statistics_tendencies else 1 @@ -266,8 +265,9 @@ def get_variable_scaling(self) -> np.ndarray: class NoTendencyScaler(BaseTendencyScaler): """No scaling by tendency statistics.""" - + def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: + del variable_stdev, variable_tendency_stdev return 1.0 @@ -276,10 +276,10 @@ class StdevTendencyScaler(BaseTendencyScaler): def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: return variable_stdev / variable_tendency_stdev - + class VarTendencyScaler(BaseTendencyScaler): """Scale loses by variance of tendency statistics.""" def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: - return variable_stdev / variable_tendency_stdev**2 \ No newline at end of file + return variable_stdev / variable_tendency_stdev**2 From 2843d98377d99f600f02f411ef3a859ac2b2c7df Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Wed, 8 Jan 2025 15:19:45 +0000 Subject: [PATCH 11/38] updating description in configs --- training/src/anemoi/training/config/training/default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index f5ac39cf..3ec7888b 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -50,6 +50,7 @@ training_loss: # Available scalars include: # - 'variable': See `variable_loss_scaling` for more information # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function + # - 'tendency': See `additional_scalars` for more information scalars: ['variable', 'variable_pressure_level', 'loss_weights_mask'] ignore_nans: False From c9788716da2e5510556866afa7cbe67e37e6d66a Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Sun, 12 Jan 2025 09:19:42 +0000 Subject: [PATCH 12/38] updating var-tendency-scaler spec --- training/src/anemoi/training/train/scaling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 7224bb9e..31bbaea7 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -282,4 +282,4 @@ class VarTendencyScaler(BaseTendencyScaler): """Scale loses by variance of tendency statistics.""" def get_level_scaling(self, variable_stdev: float, variable_tendency_stdev: float) -> float: - return variable_stdev / variable_tendency_stdev**2 + return variable_stdev**2 / variable_tendency_stdev**2 From f56f9b28d5c2606f09742712b42ec6cec6107bdf Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Sun, 12 Jan 2025 09:22:20 +0000 Subject: [PATCH 13/38] updating training/default config --- training/src/anemoi/training/config/training/default.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 3ec7888b..9023361d 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -134,13 +134,18 @@ additional_scalars: slope: 0.001 scale_dim: -1 # dimension on which scaling applied name: "variable_pressure_level" - # stdev tendency scaler + # tendency scalers # scale the prognostic losses by the stdev of the variable tendencies (e.g. the 6-hourly differences of the data) # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere) # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables + # stdev tendency scaler # - _target_: anemoi.training.data.scaling.StdevTendencyScaler # scale_dim: -1 # dimension on which scaling applied # name: "tendency" + # var tendency scaler (this should be default!?) + # - _target_: anemoi.training.data.scaling.VarTendencyScaler + # scale_dim: -1 # dimension on which scaling applied + # name: "tendency" metrics: - z_500 From be90000fb02ffc42e58ed664d091762550168abc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 Jan 2025 09:22:45 +0000 Subject: [PATCH 14/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- training/src/anemoi/training/data/dataset.py | 2 +- .../src/anemoi/training/train/forecaster.py | 17 ++++++++++------- training/src/anemoi/training/train/train.py | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/training/src/anemoi/training/data/dataset.py b/training/src/anemoi/training/data/dataset.py index 07aca06d..5d5addaa 100644 --- a/training/src/anemoi/training/data/dataset.py +++ b/training/src/anemoi/training/data/dataset.py @@ -59,7 +59,7 @@ def __init__( timeincrement : int, optional time increment between samples, by default 1 timestep : int, optional - the time frequency of the samples, by default '6h' + the time frequency of the samples, by default '6h' multistep : int, optional collate (t-1, ... t - multistep) into the input state vector, by default 1 shuffle : bool, optional diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 6cbd047c..a2c25918 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -48,7 +48,7 @@ def __init__( config: DictConfig, graph_data: HeteroData, statistics: dict, - statistics_tendencies: dict, + statistics_tendencies: dict, data_indices: IndexCollection, metadata: dict, supporting_arrays: dict, @@ -108,21 +108,24 @@ def __init__( # Instantiate the pressure level scaling class with the training configuration config_container = OmegaConf.to_container(config.training.additional_scalars, resolve=False) if isinstance(config_container, list): - scalar = [instantiate( + scalar = [ + ( + instantiate( scalar_config, scaling_config=config.training.variable_loss_scaling, data_indices=data_indices, statistics=statistics, statistics_tendencies=statistics_tendencies, ) - if scalar_config["name"] == "tendency" else - instantiate( + if scalar_config["name"] == "tendency" + else instantiate( scalar_config, scaling_config=config.training.variable_loss_scaling, data_indices=data_indices, ) - for scalar_config in config_container - ] + ) + for scalar_config in config_container + ] self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) @@ -142,7 +145,7 @@ def __init__( self.scalars = { "variable": (-1, variable_scaling), "loss_weights_mask": ((-2, -1), torch.ones((1, 1))), - "limited_area_mask": (2, limited_area_mask) + "limited_area_mask": (2, limited_area_mask), } # add addtional user-defined scalars [self.scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] diff --git a/training/src/anemoi/training/train/train.py b/training/src/anemoi/training/train/train.py index 9d0e6349..6085aba1 100644 --- a/training/src/anemoi/training/train/train.py +++ b/training/src/anemoi/training/train/train.py @@ -150,7 +150,7 @@ def model(self) -> GraphForecaster: "graph_data": self.graph_data, "metadata": self.metadata, "statistics": self.datamodule.statistics, - "statistics_tendencies": self.datamodule.statistics_tendencies, + "statistics_tendencies": self.datamodule.statistics_tendencies, "supporting_arrays": self.supporting_arrays, } From e474ae966b28c6106e7afc476ce799b79fa3221a Mon Sep 17 00:00:00 2001 From: Ewan Pinnington Date: Mon, 13 Jan 2025 11:08:55 +0000 Subject: [PATCH 15/38] updating training/default.yaml --- training/src/anemoi/training/config/training/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 9023361d..8944f8af 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -139,11 +139,11 @@ additional_scalars: # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere) # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables # stdev tendency scaler - # - _target_: anemoi.training.data.scaling.StdevTendencyScaler + # - _target_: anemoi.training.train.scaling.StdevTendencyScaler # scale_dim: -1 # dimension on which scaling applied # name: "tendency" # var tendency scaler (this should be default!?) - # - _target_: anemoi.training.data.scaling.VarTendencyScaler + # - _target_: anemoi.training.train.scaling.VarTendencyScaler # scale_dim: -1 # dimension on which scaling applied # name: "tendency" From 7cdccc5ee60ab3eac8396194848f6cfe2593bf66 Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Fri, 17 Jan 2025 09:51:35 +0000 Subject: [PATCH 16/38] first try at tests --- training/tests/train/test_loss_scaling.py | 71 ++++++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index 428dccbb..97526357 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -40,7 +40,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: "metrics": ["other", "y_850"], "pressure_level_scaler": request.param, }, - }, + }, ) name_to_index = {"x": 0, "y_50": 1, "y_500": 2, "y_850": 3, "z": 5, "q": 4, "other": 6, "d": 7} data_indices = IndexCollection(config=config, name_to_index=name_to_index) @@ -67,10 +67,26 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: "minimum": 0.2, "slope": 0.001, } +std_dev_scaler = { + "- _target_": "anemoi.training.train.scaling.StdevTendencyScaler", + "scale_dim": -1, + "name": "tendency" +} +var_scaler = { + "- _target_": "anemoi.training.train.scaling.VarTendencyScaler", + "scale_dim": -1, + "name": "tendency" +} + +no_tend_scaler = { + "- _target_": "anemoi.training.train.scaling.NoTendencyScaler", + "scale_dim": -1, + "name": "no_tendency" +} expected_linear_scaling = torch.Tensor( [ - 50 / 1000 * 0.5, # y_50 + 50 / 1000 * 0.5, # y_50 500 / 1000 * 0.5, # y_500 850 / 1000 * 0.5, # y_850 1, # q @@ -82,7 +98,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_relu_scaling = torch.Tensor( [ - 0.2 * 0.5, # y_50 + 0.2 * 0.5, # y_50 500 / 1000 * 0.5, # y_500 850 / 1000 * 0.5, # y_850 1, # q @@ -94,7 +110,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_constant_scaling = torch.Tensor( [ - 1 * 0.5, # y_50 + 1 * 0.5, # y_50 1 * 0.5, # y_500 1 * 0.5, # y_850 1, # q @@ -106,7 +122,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_polynomial_scaling = torch.Tensor( [ - ((50 / 1000) ** 2 + 0.2) * 0.5, # y_50 + ((50 / 1000) ** 2 + 0.2) * 0.5, # y_50 ((500 / 1000) ** 2 + 0.2) * 0.5, # y_500 ((850 / 1000) ** 2 + 0.2) * 0.5, # y_850 1, # q @@ -121,7 +137,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: @pytest.mark.parametrize( ("fake_data", "expected_scaling"), [ - (linear_scaler, expected_linear_scaling), + (linear_scaler, expected_linear_scaling), (relu_scaler, expected_relu_scaling), (constant_scaler, expected_constant_scaling), (polynomial_scaler, expected_polynomial_scaling), @@ -133,7 +149,7 @@ def test_variable_loss_scaling_vals( expected_scaling: torch.Tensor, ) -> None: config, data_indices = fake_data - + breakpoint() variable_loss_scaling = GraphForecaster.get_variable_scaling(config, data_indices) assert torch.allclose(variable_loss_scaling, expected_scaling) @@ -154,7 +170,7 @@ def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: data_indices.model.output.name_to_index["y_500"], data_indices.model.output.name_to_index["y_850"], ], - "sfc_other": [data_indices.model.output.name_to_index["other"]], + "sfc_other": [data_indices.model.output.name_to_index["other"]], "sfc_q": [data_indices.model.output.name_to_index["q"]], "sfc_z": [data_indices.model.output.name_to_index["z"]], "other": [data_indices.model.output.name_to_index["other"]], @@ -169,3 +185,42 @@ def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: assert metric_ranges_validation == expected_metric_range_validation assert metric_range == expected_metric_range + + +def test_no_tendency_scaling(self): + scaler = NoTendencyScaler() + result = scaler.get_level_scaling(10.0, 5.0) + assert result == 1.0, "NoTendencyScaler should always return 1.0" + +def test_stddev_tendency_scaling(self): + scaler = StdevTendencyScaler() + result = scaler.get_level_scaling(10.0, 5.0) + expected = 10.0 / 5.0 + assert pytest.approx(result, rel=1e-5) == expected, "StdevTendencyScaler should return variable_stdev / variable_tendency_stdev" + + # Test with edge case + result = scaler.get_level_scaling(0.0, 1.0) + assert result == 0.0, "StdevTendencyScaler should return 0.0 when variable_stdev is 0" + + # Test division by a very small number + result = scaler.get_level_scaling(1.0, 1e-6) + expected = 1.0 / 1e-6 + assert pytest.approx(result, rel=1e-5) == expected, "StdevTendencyScaler should handle small divisor values" + + +def test_get_level_scaling(self): + scaler = VarTendencyScaler() + result = scaler.get_level_scaling(10.0, 5.0) + expected = (10.0**2) / (5.0**2) + assert pytest.approx(result, rel=1e-5) == expected, "VarTendencyScaler should return (variable_stdev^2) / (variable_tendency_stdev^2)" + + # Test with edge case + result = scaler.get_level_scaling(0.0, 1.0) + assert result == 0.0, "VarTendencyScaler should return 0.0 when variable_stdev is 0" + + # Test division by a very small number + result = scaler.get_level_scaling(1.0, 1e-3) + expected = (1.0**2) / (1e-3**2) + assert pytest.approx(result, rel=1e-5) == expected, "VarTendencyScaler should handle small divisor values" + + From 61e7933087a939e41de5c4ceea6f4042f0002576 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:54:05 +0000 Subject: [PATCH 17/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- training/tests/train/test_loss_scaling.py | 37 +++++++++++------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index 97526357..048b4d87 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -40,7 +40,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: "metrics": ["other", "y_850"], "pressure_level_scaler": request.param, }, - }, + }, ) name_to_index = {"x": 0, "y_50": 1, "y_500": 2, "y_850": 3, "z": 5, "q": 4, "other": 6, "d": 7} data_indices = IndexCollection(config=config, name_to_index=name_to_index) @@ -70,23 +70,19 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: std_dev_scaler = { "- _target_": "anemoi.training.train.scaling.StdevTendencyScaler", "scale_dim": -1, - "name": "tendency" -} -var_scaler = { - "- _target_": "anemoi.training.train.scaling.VarTendencyScaler", - "scale_dim": -1, - "name": "tendency" + "name": "tendency", } +var_scaler = {"- _target_": "anemoi.training.train.scaling.VarTendencyScaler", "scale_dim": -1, "name": "tendency"} no_tend_scaler = { "- _target_": "anemoi.training.train.scaling.NoTendencyScaler", "scale_dim": -1, - "name": "no_tendency" + "name": "no_tendency", } expected_linear_scaling = torch.Tensor( [ - 50 / 1000 * 0.5, # y_50 + 50 / 1000 * 0.5, # y_50 500 / 1000 * 0.5, # y_500 850 / 1000 * 0.5, # y_850 1, # q @@ -98,7 +94,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_relu_scaling = torch.Tensor( [ - 0.2 * 0.5, # y_50 + 0.2 * 0.5, # y_50 500 / 1000 * 0.5, # y_500 850 / 1000 * 0.5, # y_850 1, # q @@ -110,7 +106,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_constant_scaling = torch.Tensor( [ - 1 * 0.5, # y_50 + 1 * 0.5, # y_50 1 * 0.5, # y_500 1 * 0.5, # y_850 1, # q @@ -122,7 +118,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) expected_polynomial_scaling = torch.Tensor( [ - ((50 / 1000) ** 2 + 0.2) * 0.5, # y_50 + ((50 / 1000) ** 2 + 0.2) * 0.5, # y_50 ((500 / 1000) ** 2 + 0.2) * 0.5, # y_500 ((850 / 1000) ** 2 + 0.2) * 0.5, # y_850 1, # q @@ -137,7 +133,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: @pytest.mark.parametrize( ("fake_data", "expected_scaling"), [ - (linear_scaler, expected_linear_scaling), + (linear_scaler, expected_linear_scaling), (relu_scaler, expected_relu_scaling), (constant_scaler, expected_constant_scaling), (polynomial_scaler, expected_polynomial_scaling), @@ -170,7 +166,7 @@ def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: data_indices.model.output.name_to_index["y_500"], data_indices.model.output.name_to_index["y_850"], ], - "sfc_other": [data_indices.model.output.name_to_index["other"]], + "sfc_other": [data_indices.model.output.name_to_index["other"]], "sfc_q": [data_indices.model.output.name_to_index["q"]], "sfc_z": [data_indices.model.output.name_to_index["z"]], "other": [data_indices.model.output.name_to_index["other"]], @@ -192,11 +188,14 @@ def test_no_tendency_scaling(self): result = scaler.get_level_scaling(10.0, 5.0) assert result == 1.0, "NoTendencyScaler should always return 1.0" + def test_stddev_tendency_scaling(self): scaler = StdevTendencyScaler() result = scaler.get_level_scaling(10.0, 5.0) expected = 10.0 / 5.0 - assert pytest.approx(result, rel=1e-5) == expected, "StdevTendencyScaler should return variable_stdev / variable_tendency_stdev" + assert ( + pytest.approx(result, rel=1e-5) == expected + ), "StdevTendencyScaler should return variable_stdev / variable_tendency_stdev" # Test with edge case result = scaler.get_level_scaling(0.0, 1.0) @@ -212,15 +211,15 @@ def test_get_level_scaling(self): scaler = VarTendencyScaler() result = scaler.get_level_scaling(10.0, 5.0) expected = (10.0**2) / (5.0**2) - assert pytest.approx(result, rel=1e-5) == expected, "VarTendencyScaler should return (variable_stdev^2) / (variable_tendency_stdev^2)" + assert ( + pytest.approx(result, rel=1e-5) == expected + ), "VarTendencyScaler should return (variable_stdev^2) / (variable_tendency_stdev^2)" # Test with edge case result = scaler.get_level_scaling(0.0, 1.0) assert result == 0.0, "VarTendencyScaler should return 0.0 when variable_stdev is 0" - # Test division by a very small number + # Test division by a very small number result = scaler.get_level_scaling(1.0, 1e-3) expected = (1.0**2) / (1e-3**2) assert pytest.approx(result, rel=1e-5) == expected, "VarTendencyScaler should handle small divisor values" - - From 462bb34715852da2f2d562a18e7cda1664357e64 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 17 Jan 2025 12:07:20 +0000 Subject: [PATCH 18/38] variable name and level from mars metadata --- .../src/anemoi/training/train/forecaster.py | 21 +++++++------ training/src/anemoi/training/train/scaling.py | 30 ++++++++++++++----- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 94b42441..674d1073 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -48,7 +48,7 @@ def __init__( config: DictConfig, graph_data: HeteroData, statistics: dict, - statistics_tendencies: dict, + statistics_tendencies: dict, data_indices: IndexCollection, metadata: dict, supporting_arrays: dict, @@ -103,18 +103,21 @@ def __init__( variable_scaling = GeneralVariableLossScaler( config.training.variable_loss_scaling, data_indices, + metadata["dataset"].get("variables_metadata"), ).get_variable_scaling() # Instantiate the pressure level scaling class with the training configuration config_container = OmegaConf.to_container(config.training.additional_scalars, resolve=False) if isinstance(config_container, list): - scalar = [instantiate( - scalar_config, - scaling_config=config.training.variable_loss_scaling, - data_indices=data_indices, - ) - for scalar_config in config_container - ] + scalar = [ + instantiate( + scalar_config, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, + metadata_dataset=metadata["dataset"].get("variables_metadata"), + ) + for scalar_config in config_container + ] self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) @@ -134,7 +137,7 @@ def __init__( self.scalars = { "variable": (-1, variable_scaling), "loss_weights_mask": ((-2, -1), torch.ones((1, 1))), - "limited_area_mask": (2, limited_area_mask) + "limited_area_mask": (2, limited_area_mask), } # add addtional user-defined scalars [self.scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 1f41307f..05608d9c 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -23,13 +23,14 @@ class BaseVariableLossScaler(ABC): """Configurable method converting variable to loss scaling.""" - def __init__(self, scaling_config: DictConfig, data_indices: IndexCollection) -> None: + def __init__(self, scaling_config: DictConfig, data_indices: IndexCollection, metadata_dataset: dict) -> None: """Initialise Scaler. Parameters ---------- scaling_config : data_indices : + metadata_dataset : """ self.scaling_config = scaling_config @@ -43,6 +44,7 @@ def __init__(self, scaling_config: DictConfig, data_indices: IndexCollection) -> for variable in variables: self.group_variables[variable] = group self.default_group = self.scaling_config.variable_groups.default + self.metadata_dataset = metadata_dataset @abstractmethod def get_variable_scaling(self) -> np.ndarray: ... @@ -65,11 +67,21 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Variable level, i.e. pressure level or model level """ - split = variable_name.split("_") variable_level = None - if len(split) > 1 and split[-1].isdigit(): - variable_level = int(split[-1]) - variable_name = variable_name[: -len(split[-1]) - 1] + if ( + self.metadata_dataset + and variable_name in self.metadata_dataset + and self.metadata_dataset[variable_name].get("mars") + ): + # if metadata is available: get variable name and level from metadata + variable_level = self.metadata_dataset[variable_name]["mars"].get("levelist") + variable_name = self.metadata_dataset[variable_name]["mars"]["param"] + else: + # if metadata is not available: split variable name into variable name and level + split = variable_name.split("_") + if len(split) > 1 and split[-1].isdigit(): + variable_level = int(split[-1]) + variable_name = variable_name[: -len(split[-1]) - 1] if variable_name in self.group_variables: return self.group_variables[variable_name], variable_name, variable_level return self.default_group, variable_name, variable_level @@ -108,6 +120,7 @@ def __init__( self, scaling_config: DictConfig, data_indices: IndexCollection, + metadata_dataset: dict, group: str, y_intercept: float, slope: float, @@ -122,6 +135,8 @@ def __init__( Configuration for variable loss scaling. data_indices : IndexCollection Collection of data indices. + metadata_dataset : dict + Metadata of the dataset. group : str Group of variables to scale. y_intercept : float @@ -129,7 +144,7 @@ def __init__( slope : float Slope of scaling function. """ - super().__init__(scaling_config, data_indices) + super().__init__(scaling_config, data_indices, metadata_dataset) self.scaling_group = group self.y_intercept = y_intercept self.slope = slope @@ -189,6 +204,7 @@ def __init__( self, scaling_config: DictConfig, data_indices: IndexCollection, + metadata_dataset: dict, group: str, slope: float = 0.0, y_intercept: float = 1.0, @@ -197,7 +213,7 @@ def __init__( assert ( y_intercept == 1.0 and slope == 0 ), "self.y_intercept must be 1.0 and self.slope 0.0 for no scaling to fit with definition of linear function." - super().__init__(scaling_config, data_indices, group, slope=0.0, y_intercept=1.0) + super().__init__(scaling_config, data_indices, metadata_dataset, group, slope=0.0, y_intercept=1.0) @staticmethod def get_level_scaling(variable_level: float) -> np.ndarray: From af10173b93117bcfbcc9e65a9c3d31ba5e22f5d8 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 17 Jan 2025 12:40:05 +0000 Subject: [PATCH 19/38] get variable group and level in utils file --- training/src/anemoi/training/train/scaling.py | 28 +++------- .../training/utils/variables_metadata.py | 56 +++++++++++++++++++ 2 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 training/src/anemoi/training/utils/variables_metadata.py diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index ae495a62..d1ae6354 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -21,6 +21,8 @@ import numpy as np +from anemoi.training.utils.variables_metadata import get_variable_group_and_level + LOGGER = logging.getLogger(__name__) @@ -42,7 +44,7 @@ def __init__( data_indices : IndexCollection Collection of data indices. metadata_variables : dict, optional - Metadata of the variables in the dataset if available. + Dictionary with variable names as keys and metadata as values, by default None """ self.scaling_config = scaling_config @@ -79,24 +81,12 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Variable level, i.e. pressure level or model level """ - variable_level = None - if ( - self.metadata_variables - and variable_name in self.metadata_variables - and self.metadata_variables[variable_name].get("mars") - ): - # if metadata is available: get variable name and level from metadata - variable_level = self.metadata_variables[variable_name]["mars"].get("levelist") - variable_name = self.metadata_variables[variable_name]["mars"]["param"] - else: - # if metadata not available: split variable name into variable name and level - split = variable_name.split("_") - if len(split) > 1 and split[-1].isdigit(): - variable_level = int(split[-1]) - variable_name = variable_name[: -len(split[-1]) - 1] - if variable_name in self.group_variables: - return self.group_variables[variable_name], variable_name, variable_level - return self.default_group, variable_name, variable_level + return get_variable_group_and_level( + variable_name, + self.group_variables, + self.metadata_variables, + self.default_group, + ) class GeneralVariableLossScaler(BaseVariableLossScaler): diff --git a/training/src/anemoi/training/utils/variables_metadata.py b/training/src/anemoi/training/utils/variables_metadata.py new file mode 100644 index 00000000..6b195738 --- /dev/null +++ b/training/src/anemoi/training/utils/variables_metadata.py @@ -0,0 +1,56 @@ +# (C) Copyright 2024 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from __future__ import annotations + + +def get_variable_group_and_level( + variable_name: str, + group_variables: dict, + metadata_variables: dict | None = None, + default_group: str = "sfc", +) -> tuple[str, str, int]: + """Get the group and level of a variable. + + Parameters + ---------- + variable_name : str + Name of the variable. + + group_variables : dict + Dictionary with variable names as keys and groups as values. + metadata_variables : dict, optional + Dictionary with variable names as keys and metadata as values, by default None + default_group : str, optional + Default group to return if the variable is not found in the group_variables dictionary, by default "sfc" + + Returns + ------- + str + Group of the variable given in the training-config file. + str + Variable reference which corresponds to the variable name without the variable level + str + Variable level, i.e. pressure level or model level + + """ + variable_level = None + if metadata_variables and variable_name in metadata_variables and metadata_variables[variable_name].get("mars"): + # if metadata is available: get variable name and level from metadata + variable_level = metadata_variables[variable_name]["mars"].get("levelist") + variable_name = metadata_variables[variable_name]["mars"]["param"] + else: + # if metadata not available: split variable name into variable name and level + split = variable_name.split("_") + if len(split) > 1 and split[-1].isdigit(): + variable_level = int(split[-1]) + variable_name = variable_name[: -len(split[-1]) - 1] + if variable_name in group_variables: + return group_variables[variable_name], variable_name, variable_level + return default_group, variable_name, variable_level From 395cd6f6df1dd87f5562a9ce6b3b1b206d7f619c Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 17 Jan 2025 12:41:32 +0000 Subject: [PATCH 20/38] empty line --- training/src/anemoi/training/utils/variables_metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/training/src/anemoi/training/utils/variables_metadata.py b/training/src/anemoi/training/utils/variables_metadata.py index 6b195738..4a149a65 100644 --- a/training/src/anemoi/training/utils/variables_metadata.py +++ b/training/src/anemoi/training/utils/variables_metadata.py @@ -22,7 +22,6 @@ def get_variable_group_and_level( ---------- variable_name : str Name of the variable. - group_variables : dict Dictionary with variable names as keys and groups as values. metadata_variables : dict, optional From 1f53a82c6bbf69ee4c1848b143b581e4e828afac Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 17 Jan 2025 15:40:24 +0000 Subject: [PATCH 21/38] convert test for new strucutre. pressure level and general variable scaling --- training/src/anemoi/training/train/scaling.py | 15 ++- training/tests/train/test_loss_scaling.py | 123 +++++++++++++----- 2 files changed, 104 insertions(+), 34 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index d1ae6354..2a95c6ee 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -208,14 +208,25 @@ def __init__( data_indices: IndexCollection, metadata_variables: dict, group: str, - slope: float = 0.0, y_intercept: float = 1.0, + slope: float = 0.0, + name: str | None = None, + scale_dim: int | None = None, ) -> None: """Initialise Scaler with constant scaling of 1.""" assert ( y_intercept == 1.0 and slope == 0 ), "self.y_intercept must be 1.0 and self.slope 0.0 for no scaling to fit with definition of linear function." - super().__init__(scaling_config, data_indices, metadata_variables, group, slope=0.0, y_intercept=1.0) + super().__init__( + scaling_config, + data_indices, + metadata_variables, + group, + y_intercept=1.0, + slope=0.0, + name=name, + scale_dim=scale_dim, + ) @staticmethod def get_level_scaling(variable_level: float) -> np.ndarray: diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index 048b4d87..142165f2 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -11,10 +11,15 @@ import pytest import torch from _pytest.fixtures import SubRequest +from hydra.utils import instantiate from omegaconf import DictConfig from anemoi.models.data_indices.collection import IndexCollection from anemoi.training.train.forecaster import GraphForecaster +from anemoi.training.train.scaling import GeneralVariableLossScaler +from anemoi.training.train.scaling import NoTendencyScaler +from anemoi.training.train.scaling import StdevTendencyScaler +from anemoi.training.train.scaling import VarTendencyScaler @pytest.fixture @@ -30,15 +35,17 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: }, "training": { "variable_loss_scaling": { - "default": 1, - "sfc": { - "z": 0.1, - "other": 100, + "variable_groups": { + "default": "sfc", + "pl": ["y"], }, - "pl": {"y": 0.5}, + "default": 1, + "z": 0.1, + "other": 100, + "y": 0.5, }, "metrics": ["other", "y_850"], - "pressure_level_scaler": request.param, + "additional_scalars": request.param, }, }, ) @@ -47,26 +54,47 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: return config, data_indices -linear_scaler = { - "_target_": "anemoi.training.data.scaling.LinearPressureLevelScaler", - "minimum": 0.0, - "slope": 0.001, -} -relu_scaler = { - "_target_": "anemoi.training.data.scaling.ReluPressureLevelScaler", - "minimum": 0.2, - "slope": 0.001, -} -constant_scaler = { - "_target_": "anemoi.training.data.scaling.NoPressureLevelScaler", - "minimum": 1.0, - "slope": 0.0, -} -polynomial_scaler = { - "_target_": "anemoi.training.data.scaling.PolynomialPressureLevelScaler", - "minimum": 0.2, - "slope": 0.001, -} +linear_scaler = [ + { + "_target_": "anemoi.training.train.scaling.LinearVariableLevelScaler", + "group": "pl", + "y_intercept": 0.0, + "slope": 0.001, + "scale_dim": -1, + "name": "variable_pressure_level", + }, +] +relu_scaler = [ + { + "_target_": "anemoi.training.train.scaling.ReluVariableLevelScaler", + "group": "pl", + "y_intercept": 0.2, + "slope": 0.001, + "scale_dim": -1, + "name": "variable_pressure_level", + }, +] +constant_scaler = [ + { + "_target_": "anemoi.training.train.scaling.NoVariableLevelScaler", + "group": "pl", + "y_intercept": 1.0, + "slope": 0.0, + "scale_dim": -1, + "name": "variable_pressure_level", + }, +] +polynomial_scaler = [ + { + "_target_": "anemoi.training.train.scaling.PolynomialVariableLevelScaler", + "group": "pl", + "y_intercept": 0.2, + "slope": 0.001, + "scale_dim": -1, + "name": "variable_pressure_level", + }, +] + std_dev_scaler = { "- _target_": "anemoi.training.train.scaling.StdevTendencyScaler", "scale_dim": -1, @@ -145,10 +173,40 @@ def test_variable_loss_scaling_vals( expected_scaling: torch.Tensor, ) -> None: config, data_indices = fake_data - breakpoint() - variable_loss_scaling = GraphForecaster.get_variable_scaling(config, data_indices) + variable_scaling = GeneralVariableLossScaler( + config.training.variable_loss_scaling, + data_indices, + ).get_variable_scaling() + + scalar = [ + ( + instantiate( + scalar_config, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, + statistics=None, + statistics_tendencies=None, + ) + if scalar_config["name"] == "tendency" + else instantiate( + scalar_config, + scaling_config=config.training.variable_loss_scaling, + data_indices=data_indices, + metadata_variables=None, + ) + ) + for scalar_config in config.training.additional_scalars + ] + + scalars = { + "variable": (-1, variable_scaling), + } + # add addtional user-defined scalars + [scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] + keys_list = list(scalars.keys()) + scalars[keys_list[0]][1] * scalars[keys_list[1]][1] - assert torch.allclose(variable_loss_scaling, expected_scaling) + assert torch.allclose(torch.tensor(scalars[keys_list[0]][1] * scalars[keys_list[1]][1]), expected_scaling) @pytest.mark.parametrize("fake_data", [linear_scaler], indirect=["fake_data"]) @@ -183,13 +241,14 @@ def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: assert metric_range == expected_metric_range -def test_no_tendency_scaling(self): +# TODO(Mariana): Add tests for the following classes +def test_no_tendency_scaling() -> None: scaler = NoTendencyScaler() result = scaler.get_level_scaling(10.0, 5.0) assert result == 1.0, "NoTendencyScaler should always return 1.0" -def test_stddev_tendency_scaling(self): +def test_stddev_tendency_scaling() -> None: scaler = StdevTendencyScaler() result = scaler.get_level_scaling(10.0, 5.0) expected = 10.0 / 5.0 @@ -207,7 +266,7 @@ def test_stddev_tendency_scaling(self): assert pytest.approx(result, rel=1e-5) == expected, "StdevTendencyScaler should handle small divisor values" -def test_get_level_scaling(self): +def test_get_level_scaling() -> None: scaler = VarTendencyScaler() result = scaler.get_level_scaling(10.0, 5.0) expected = (10.0**2) / (5.0**2) From 374795903967849be42affdf675ef9c68e167c52 Mon Sep 17 00:00:00 2001 From: sahahner Date: Fri, 17 Jan 2025 15:50:32 +0000 Subject: [PATCH 22/38] more plausible check for availability of mars metadata --- training/src/anemoi/training/utils/variables_metadata.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/training/src/anemoi/training/utils/variables_metadata.py b/training/src/anemoi/training/utils/variables_metadata.py index 4a149a65..5dca0e62 100644 --- a/training/src/anemoi/training/utils/variables_metadata.py +++ b/training/src/anemoi/training/utils/variables_metadata.py @@ -40,7 +40,10 @@ def get_variable_group_and_level( """ variable_level = None - if metadata_variables and variable_name in metadata_variables and metadata_variables[variable_name].get("mars"): + mars_metadata_available = ( + metadata_variables and variable_name in metadata_variables and metadata_variables[variable_name].get("mars") + ) + if mars_metadata_available and metadata_variables[variable_name]["mars"].get("param"): # if metadata is available: get variable name and level from metadata variable_level = metadata_variables[variable_name]["mars"].get("levelist") variable_name = metadata_variables[variable_name]["mars"]["param"] From 68cd6e377e682f3c3e58c67d3bde355b096fed56 Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Fri, 17 Jan 2025 21:44:26 +0000 Subject: [PATCH 23/38] update to tendency tests (still not working) --- training/tests/train/test_loss_scaling.py | 134 ++++++++++++---------- 1 file changed, 72 insertions(+), 62 deletions(-) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index 142165f2..ca61d297 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -17,9 +17,6 @@ from anemoi.models.data_indices.collection import IndexCollection from anemoi.training.train.forecaster import GraphForecaster from anemoi.training.train.scaling import GeneralVariableLossScaler -from anemoi.training.train.scaling import NoTendencyScaler -from anemoi.training.train.scaling import StdevTendencyScaler -from anemoi.training.train.scaling import VarTendencyScaler @pytest.fixture @@ -51,7 +48,9 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) name_to_index = {"x": 0, "y_50": 1, "y_500": 2, "y_850": 3, "z": 5, "q": 4, "other": 6, "d": 7} data_indices = IndexCollection(config=config, name_to_index=name_to_index) - return config, data_indices + statistics = {"stdev": [10.0, 10, 10, 7.0, 3.0, 1.0, 2.0, 3.5, 3.5]} + statistics_tendencies = {"stdev": [5, 5, 5, 4.0, 7.5, 8.6, 1, 10, 10]} + return config, data_indices, statistics, statistics_tendencies linear_scaler = [ @@ -95,18 +94,29 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: }, ] -std_dev_scaler = { - "- _target_": "anemoi.training.train.scaling.StdevTendencyScaler", - "scale_dim": -1, - "name": "tendency", -} -var_scaler = {"- _target_": "anemoi.training.train.scaling.VarTendencyScaler", "scale_dim": -1, "name": "tendency"} +std_dev_scaler = [ + { + "_target_": "anemoi.training.train.scaling.StdevTendencyScaler", + "name": "tendency", + "scale_dim": -1, + }, +] + +var_scaler = [ + { + "_target_": "anemoi.training.train.scaling.VarTendencyScaler", + "name": "tendency", + "scale_dim": -1, + }, +] -no_tend_scaler = { - "- _target_": "anemoi.training.train.scaling.NoTendencyScaler", - "scale_dim": -1, - "name": "no_tendency", -} +no_tend_scaler = [ + { + "_target_": "anemoi.training.train.scaling.NoTendencyScaler", + "name": "tendency", + "scale_dim": -1, + }, +] expected_linear_scaling = torch.Tensor( [ @@ -157,6 +167,45 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ], ) +expected_no_tendency_scaling = torch.Tensor( + [ + 1 * 0.5, # y_50 + 1 * 0.5, # y_500 + 1 * 0.5, # y_850 + 1 * 1, # q + 1 * 0.1, # z + 1 * 100, # other + 1 * 1, # cos_d + 1 * 1, # sin_d + ], +) + +expected_stdev_tendency_scaling = torch.Tensor( + [ + (10.0 / 5.0) * 0.5, # y_50 + (10.0 / 5.0) * 0.5, # y_500 + (10.0 / 5.0) * 0.5, # y_850 + (7.0 / 4) * 1, # q + (3.0 / 7.5) * 0.1, # z + (1.0 / 8.6) * 100, # other + (3.5 / 10) * 1, # cos_d + (3.5 / 10) * 1, # sin_d + ], +) + +expected_var_tendency_scaling = torch.Tensor( + [ + (10.0**2) / (5.0**2) * 0.5, # y_50 + (10.0**2) / (5.0**2) * 0.5, # y_500 + (10.0**2) / (5.0**2) * 0.5, # y_850 + (7.0**2) / (4**2) * 1, # q + (3.0**2) / (7.5**2) * 0.1, # z + (1.0**2) / (8.6**2) * 100, # other + (3.5**2) / (10**2) * 1, # cos_d + (3.5**2) / (10**2) * 1, # sin_d + ], +) + @pytest.mark.parametrize( ("fake_data", "expected_scaling"), @@ -165,14 +214,17 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: (relu_scaler, expected_relu_scaling), (constant_scaler, expected_constant_scaling), (polynomial_scaler, expected_polynomial_scaling), + (no_tend_scaler, expected_no_tendency_scaling), + (std_dev_scaler, expected_stdev_tendency_scaling), + (var_scaler, expected_var_tendency_scaling), ], indirect=["fake_data"], ) def test_variable_loss_scaling_vals( - fake_data: tuple[DictConfig, IndexCollection], + fake_data: tuple[DictConfig, IndexCollection, torch.Tensor, torch.Tensor], expected_scaling: torch.Tensor, ) -> None: - config, data_indices = fake_data + config, data_indices, statistics, statistics_tendencies = fake_data variable_scaling = GeneralVariableLossScaler( config.training.variable_loss_scaling, data_indices, @@ -184,8 +236,8 @@ def test_variable_loss_scaling_vals( scalar_config, scaling_config=config.training.variable_loss_scaling, data_indices=data_indices, - statistics=None, - statistics_tendencies=None, + statistics=statistics, + statistics_tendencies=statistics_tendencies, ) if scalar_config["name"] == "tendency" else instantiate( @@ -202,6 +254,7 @@ def test_variable_loss_scaling_vals( "variable": (-1, variable_scaling), } # add addtional user-defined scalars + [scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] keys_list = list(scalars.keys()) scalars[keys_list[0]][1] * scalars[keys_list[1]][1] @@ -239,46 +292,3 @@ def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: assert metric_ranges_validation == expected_metric_range_validation assert metric_range == expected_metric_range - - -# TODO(Mariana): Add tests for the following classes -def test_no_tendency_scaling() -> None: - scaler = NoTendencyScaler() - result = scaler.get_level_scaling(10.0, 5.0) - assert result == 1.0, "NoTendencyScaler should always return 1.0" - - -def test_stddev_tendency_scaling() -> None: - scaler = StdevTendencyScaler() - result = scaler.get_level_scaling(10.0, 5.0) - expected = 10.0 / 5.0 - assert ( - pytest.approx(result, rel=1e-5) == expected - ), "StdevTendencyScaler should return variable_stdev / variable_tendency_stdev" - - # Test with edge case - result = scaler.get_level_scaling(0.0, 1.0) - assert result == 0.0, "StdevTendencyScaler should return 0.0 when variable_stdev is 0" - - # Test division by a very small number - result = scaler.get_level_scaling(1.0, 1e-6) - expected = 1.0 / 1e-6 - assert pytest.approx(result, rel=1e-5) == expected, "StdevTendencyScaler should handle small divisor values" - - -def test_get_level_scaling() -> None: - scaler = VarTendencyScaler() - result = scaler.get_level_scaling(10.0, 5.0) - expected = (10.0**2) / (5.0**2) - assert ( - pytest.approx(result, rel=1e-5) == expected - ), "VarTendencyScaler should return (variable_stdev^2) / (variable_tendency_stdev^2)" - - # Test with edge case - result = scaler.get_level_scaling(0.0, 1.0) - assert result == 0.0, "VarTendencyScaler should return 0.0 when variable_stdev is 0" - - # Test division by a very small number - result = scaler.get_level_scaling(1.0, 1e-3) - expected = (1.0**2) / (1e-3**2) - assert pytest.approx(result, rel=1e-5) == expected, "VarTendencyScaler should handle small divisor values" From d6e127a7694429b677ac1337609954c7d7c110e1 Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Mon, 20 Jan 2025 14:07:20 +0000 Subject: [PATCH 24/38] tendency scaler tests now working --- training/src/anemoi/training/train/scaling.py | 2 +- training/tests/train/test_loss_scaling.py | 21 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 2a95c6ee..0d2f39f4 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -279,7 +279,7 @@ def get_variable_scaling(self) -> np.ndarray: for key, idx in self.data_indices.internal_model.output.name_to_index.items(): if idx in self.data_indices.internal_model.output.prognostic: - prog_idx = self.data_indices.data.output.name_to_index[key] + prog_idx = self.data_indices.internal_model.output.name_to_index[key] variable_stdev = self.statistics["stdev"][prog_idx] if self.statistics_tendencies else 1 variable_tendency_stdev = ( self.statistics_tendencies["stdev"][prog_idx] if self.statistics_tendencies else 1 diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index ca61d297..bce496a2 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -48,8 +48,8 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ) name_to_index = {"x": 0, "y_50": 1, "y_500": 2, "y_850": 3, "z": 5, "q": 4, "other": 6, "d": 7} data_indices = IndexCollection(config=config, name_to_index=name_to_index) - statistics = {"stdev": [10.0, 10, 10, 7.0, 3.0, 1.0, 2.0, 3.5, 3.5]} - statistics_tendencies = {"stdev": [5, 5, 5, 4.0, 7.5, 8.6, 1, 10, 10]} + statistics = {"stdev": [10.0, 10, 10, 7.0, 3.0, 1.0, 2.0, 3.5]} + statistics_tendencies = {"stdev": [5, 5, 5, 4.0, 7.5, 8.6, 1, 10]} return config, data_indices, statistics, statistics_tendencies @@ -185,10 +185,10 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: (10.0 / 5.0) * 0.5, # y_50 (10.0 / 5.0) * 0.5, # y_500 (10.0 / 5.0) * 0.5, # y_850 - (7.0 / 4) * 1, # q - (3.0 / 7.5) * 0.1, # z - (1.0 / 8.6) * 100, # other - (3.5 / 10) * 1, # cos_d + 1, # q + 0.1, # z + (1/ 8.6) * 100, # other + (2 / 1) * 1, # cos_d (3.5 / 10) * 1, # sin_d ], ) @@ -198,10 +198,10 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: (10.0**2) / (5.0**2) * 0.5, # y_50 (10.0**2) / (5.0**2) * 0.5, # y_500 (10.0**2) / (5.0**2) * 0.5, # y_850 - (7.0**2) / (4**2) * 1, # q - (3.0**2) / (7.5**2) * 0.1, # z - (1.0**2) / (8.6**2) * 100, # other - (3.5**2) / (10**2) * 1, # cos_d + 1, # q + 0.1, # z + (1**2) / (8.6**2) * 100, # other + (2**2) / (1**2) * 1, # cos_d (3.5**2) / (10**2) * 1, # sin_d ], ) @@ -258,7 +258,6 @@ def test_variable_loss_scaling_vals( [scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] keys_list = list(scalars.keys()) scalars[keys_list[0]][1] * scalars[keys_list[1]][1] - assert torch.allclose(torch.tensor(scalars[keys_list[0]][1] * scalars[keys_list[1]][1]), expected_scaling) From fd29cbceffd1f7379d947c11af3e1dba0be4501e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Jan 2025 14:07:42 +0000 Subject: [PATCH 25/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- training/tests/train/test_loss_scaling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index bce496a2..c2a020a0 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -186,8 +186,8 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: (10.0 / 5.0) * 0.5, # y_500 (10.0 / 5.0) * 0.5, # y_850 1, # q - 0.1, # z - (1/ 8.6) * 100, # other + 0.1, # z + (1 / 8.6) * 100, # other (2 / 1) * 1, # cos_d (3.5 / 10) * 1, # sin_d ], @@ -199,7 +199,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: (10.0**2) / (5.0**2) * 0.5, # y_500 (10.0**2) / (5.0**2) * 0.5, # y_850 1, # q - 0.1, # z + 0.1, # z (1**2) / (8.6**2) * 100, # other (2**2) / (1**2) * 1, # cos_d (3.5**2) / (10**2) * 1, # sin_d From 8bff68bcf933e88c74e369df4be4e6d4b8a2c5a6 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 09:27:30 +0000 Subject: [PATCH 26/38] change function into class, extracting variable group and name --- training/src/anemoi/training/train/scaling.py | 13 +-- .../training/utils/variables_metadata.py | 88 +++++++++++-------- 2 files changed, 61 insertions(+), 40 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 2a95c6ee..d04ce8d8 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -21,7 +21,7 @@ import numpy as np -from anemoi.training.utils.variables_metadata import get_variable_group_and_level +from anemoi.training.utils.variables_metadata import ExtractVariableGroupAndLevel LOGGER = logging.getLogger(__name__) @@ -60,6 +60,12 @@ def __init__( self.default_group = self.scaling_config.variable_groups.default self.metadata_variables = metadata_variables + self.ExtractVariableGroupAndLevel = ExtractVariableGroupAndLevel( + self.group_variables, + self.metadata_variables, + self.default_group, + ) + @abstractmethod def get_variable_scaling(self) -> np.ndarray: ... @@ -81,11 +87,8 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Variable level, i.e. pressure level or model level """ - return get_variable_group_and_level( + return self.extract_variable_group_and_level.get_group_and_level( variable_name, - self.group_variables, - self.metadata_variables, - self.default_group, ) diff --git a/training/src/anemoi/training/utils/variables_metadata.py b/training/src/anemoi/training/utils/variables_metadata.py index 5dca0e62..71648016 100644 --- a/training/src/anemoi/training/utils/variables_metadata.py +++ b/training/src/anemoi/training/utils/variables_metadata.py @@ -10,18 +10,14 @@ from __future__ import annotations -def get_variable_group_and_level( - variable_name: str, - group_variables: dict, - metadata_variables: dict | None = None, - default_group: str = "sfc", -) -> tuple[str, str, int]: - """Get the group and level of a variable. +class ExtractVariableGroupAndLevel: + """Extract the group and level of a variable from dataset metadata and training-config file. + + Extract variables group from the training-config file and variable level from the dataset metadata. + If dataset metadata is not available, the variable level is extracted from the variable name. Parameters ---------- - variable_name : str - Name of the variable. group_variables : dict Dictionary with variable names as keys and groups as values. metadata_variables : dict, optional @@ -29,30 +25,52 @@ def get_variable_group_and_level( default_group : str, optional Default group to return if the variable is not found in the group_variables dictionary, by default "sfc" - Returns - ------- - str - Group of the variable given in the training-config file. - str - Variable reference which corresponds to the variable name without the variable level - str - Variable level, i.e. pressure level or model level - """ - variable_level = None - mars_metadata_available = ( - metadata_variables and variable_name in metadata_variables and metadata_variables[variable_name].get("mars") - ) - if mars_metadata_available and metadata_variables[variable_name]["mars"].get("param"): - # if metadata is available: get variable name and level from metadata - variable_level = metadata_variables[variable_name]["mars"].get("levelist") - variable_name = metadata_variables[variable_name]["mars"]["param"] - else: - # if metadata not available: split variable name into variable name and level - split = variable_name.split("_") - if len(split) > 1 and split[-1].isdigit(): - variable_level = int(split[-1]) - variable_name = variable_name[: -len(split[-1]) - 1] - if variable_name in group_variables: - return group_variables[variable_name], variable_name, variable_level - return default_group, variable_name, variable_level + + def __init__( + self, + group_variables: dict, + metadata_variables: dict | None = None, + default_group: str = "sfc", + ) -> None: + self.group_variables = group_variables + self.metadata_variables = metadata_variables + self.default_group = default_group + + def get_group_and_level(self, variable_name: str) -> tuple[str, str, int]: + """Get the group and level of a variable. + + Parameters + ---------- + variable_name : str + Name of the variable. + + Returns + ------- + str + Group of the variable given in the training-config file. + str + Variable reference which corresponds to the variable name without the variable level + str + Variable level, i.e. pressure level or model level + + """ + variable_level = None + mars_metadata_available = ( + self.metadata_variables + and variable_name in self.metadata_variables + and self.metadata_variables[variable_name].get("mars") + ) + if mars_metadata_available and self.metadata_variables[variable_name]["mars"].get("param"): + # if metadata is available: get variable name and level from metadata + variable_level = self.metadata_variables[variable_name]["mars"].get("levelist") + variable_name = self.metadata_variables[variable_name]["mars"]["param"] + else: + # if metadata not available: split variable name into variable name and level + split = variable_name.split("_") + if len(split) > 1 and split[-1].isdigit(): + variable_level = int(split[-1]) + variable_name = variable_name[: -len(split[-1]) - 1] + if variable_name in self.group_variables: + return self.group_variables[variable_name], variable_name, variable_level + return self.default_group, variable_name, variable_level From 7d8c76dabd7a12f8c3b0a10e25af3b5b1c5e4ded Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 09:34:13 +0000 Subject: [PATCH 27/38] correct function call --- training/src/anemoi/training/train/scaling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 2754b890..72a46de3 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -87,7 +87,7 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Variable level, i.e. pressure level or model level """ - return self.extract_variable_group_and_level.get_group_and_level( + return self.ExtractVariableGroupAndLevel.get_group_and_level( variable_name, ) From d928b30e9dd4b45289770e2c81ee86da232288b6 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 09:34:36 +0000 Subject: [PATCH 28/38] correct typo in test --- training/tests/train/test_loss_scaling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index c2a020a0..b14560ce 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -263,7 +263,7 @@ def test_variable_loss_scaling_vals( @pytest.mark.parametrize("fake_data", [linear_scaler], indirect=["fake_data"]) def test_metric_range(fake_data: tuple[DictConfig, IndexCollection]) -> None: - config, data_indices = fake_data + config, data_indices, _, _ = fake_data metric_range, metric_ranges_validation = GraphForecaster.get_val_metric_ranges(config, data_indices) From bb054ce39c1c3f5dfb85e6e4f5b54c25c2a8fa1f Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 13:35:51 +0000 Subject: [PATCH 29/38] incorporate comments --- training/src/anemoi/training/train/scaling.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 72a46de3..f35d4f31 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -60,14 +60,16 @@ def __init__( self.default_group = self.scaling_config.variable_groups.default self.metadata_variables = metadata_variables - self.ExtractVariableGroupAndLevel = ExtractVariableGroupAndLevel( + self.extract_variable_group_and_level = ExtractVariableGroupAndLevel( self.group_variables, self.metadata_variables, self.default_group, ) @abstractmethod - def get_variable_scaling(self) -> np.ndarray: ... + def get_variable_scaling(self) -> np.ndarray: + """Get the scaling of the variables.""" + ... def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: """Get the group of a variable. @@ -87,13 +89,13 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: Variable level, i.e. pressure level or model level """ - return self.ExtractVariableGroupAndLevel.get_group_and_level( + return self.extract_variable_group_and_level.get_group_and_level( variable_name, ) class GeneralVariableLossScaler(BaseVariableLossScaler): - """General scaling of variables to loss scaling.""" + """Scaling per variable defined in config file.""" def get_variable_scaling(self) -> np.ndarray: variable_loss_scaling = ( From d0046fa8ba330deaa2480c3eca48a689c844f3e3 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 13:44:19 +0000 Subject: [PATCH 30/38] introduce base class for all loss scalings --- .../src/anemoi/training/train/forecaster.py | 4 +- training/src/anemoi/training/train/scaling.py | 42 +++++++++++++------ training/tests/train/test_loss_scaling.py | 4 +- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 44d536d3..91bc35d6 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -104,7 +104,7 @@ def __init__( config.training.variable_loss_scaling, data_indices, metadata["dataset"].get("variables_metadata"), - ).get_variable_scaling() + ).get_scaling() # Instantiate the pressure level scaling class with the training configuration config_container = OmegaConf.to_container(config.training.additional_scalars, resolve=False) @@ -150,7 +150,7 @@ def __init__( "limited_area_mask": (2, limited_area_mask), } # add addtional user-defined scalars - [self.scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] + [self.scalars.update({scale.name: (scale.scale_dim, scale.get_scaling())}) for scale in scalar] self.updated_loss_mask = False diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index f35d4f31..8bde19a2 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -26,8 +26,30 @@ LOGGER = logging.getLogger(__name__) -class BaseVariableLossScaler(ABC): - """Configurable method converting variable to loss scaling.""" +class BaseScaler(ABC): + """Base class for all loss scalers.""" + + def __init__(self, scaling_config: DictConfig, data_indices: IndexCollection) -> None: + """Initialise BaseScaler. + + Parameters + ---------- + scaling_config : DictConfig + Configuration for loss scaling. + data_indices : IndexCollection + Collection of data indices. + """ + self.scaling_config = scaling_config + self.data_indices = data_indices + + @abstractmethod + def get_scaling(self) -> np.ndarray: + """Abstract method to get loss scaling.""" + ... + + +class BaseVariableLossScaler(BaseScaler): + """Base class for all variable loss scalers.""" def __init__( self, @@ -47,8 +69,7 @@ def __init__( Dictionary with variable names as keys and metadata as values, by default None """ - self.scaling_config = scaling_config - self.data_indices = data_indices + super().__init__(scaling_config, data_indices) self.variable_groups = self.scaling_config.variable_groups # turn dictionary around self.group_variables = {} @@ -66,11 +87,6 @@ def __init__( self.default_group, ) - @abstractmethod - def get_variable_scaling(self) -> np.ndarray: - """Get the scaling of the variables.""" - ... - def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: """Get the group of a variable. @@ -97,7 +113,7 @@ def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: class GeneralVariableLossScaler(BaseVariableLossScaler): """Scaling per variable defined in config file.""" - def get_variable_scaling(self) -> np.ndarray: + def get_scaling(self) -> np.ndarray: variable_loss_scaling = ( np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) * self.scaling_config.default ) @@ -121,7 +137,7 @@ def get_variable_scaling(self) -> np.ndarray: class BaseVariableLevelScaler(BaseVariableLossScaler): - """Configurable method converting variable level to scaling.""" + """Configurable method converting variable level to loss scalings.""" def __init__( self, @@ -161,7 +177,7 @@ def __init__( @abstractmethod def get_level_scaling(self, variable_level: int) -> float: ... - def get_variable_scaling(self) -> np.ndarray: + def get_scaling(self) -> np.ndarray: variable_level_scaling = np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) LOGGER.info( @@ -277,7 +293,7 @@ def __init__( @abstractmethod def get_level_scaling(self, variable_level: int) -> float: ... - def get_variable_scaling(self) -> np.ndarray: + def get_scaling(self) -> np.ndarray: variable_level_scaling = np.ones((len(self.data_indices.internal_data.output.full),), dtype=np.float32) LOGGER.info("Variable Level Scaling: Applying %s scaling to prognostic variables", self.name) diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index b14560ce..aef906e8 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -228,7 +228,7 @@ def test_variable_loss_scaling_vals( variable_scaling = GeneralVariableLossScaler( config.training.variable_loss_scaling, data_indices, - ).get_variable_scaling() + ).get_scaling() scalar = [ ( @@ -255,7 +255,7 @@ def test_variable_loss_scaling_vals( } # add addtional user-defined scalars - [scalars.update({scale.name: (scale.scale_dim, scale.get_variable_scaling())}) for scale in scalar] + [scalars.update({scale.name: (scale.scale_dim, scale.get_scaling())}) for scale in scalar] keys_list = list(scalars.keys()) scalars[keys_list[0]][1] * scalars[keys_list[1]][1] assert torch.allclose(torch.tensor(scalars[keys_list[0]][1] * scalars[keys_list[1]][1]), expected_scaling) From a03d6ba72c1e82a7ad234f79a326c3b255a5e62b Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 13:45:34 +0000 Subject: [PATCH 31/38] type checking check after all imports --- training/src/anemoi/training/train/scaling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 8bde19a2..0195816b 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -15,14 +15,15 @@ from abc import abstractmethod from typing import TYPE_CHECKING -if TYPE_CHECKING: - from omegaconf import DictConfig - from anemoi.models.data_indices.collection import IndexCollection - import numpy as np from anemoi.training.utils.variables_metadata import ExtractVariableGroupAndLevel +if TYPE_CHECKING: + from omegaconf import DictConfig + + from anemoi.models.data_indices.collection import IndexCollection + LOGGER = logging.getLogger(__name__) From aa7f558546f6d2e9fe89bc319799d9a55d70d7d4 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 13:56:36 +0000 Subject: [PATCH 32/38] comment: explanation about variable groups in config file --- training/src/anemoi/training/config/training/default.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index 8944f8af..daa76866 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -110,6 +110,9 @@ lr: # Variable loss scaling # 'variable' must be included in `scalars` in the losses for this to be applied. variable_loss_scaling: + # Variable groups definition for scaling by variable level. + # The variable level scaling methods are defined under additional_scalars + # The default group is appended as prefix to the metric of all variables not asigned to a group. variable_groups: default: sfc pl: [q, t, u, v, w, z] From 9a8a4b9b15435e600deff2eef20f1c8e33c232e4 Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Wed, 22 Jan 2025 15:17:37 +0000 Subject: [PATCH 33/38] rm if statement for tendency scaler --- training/src/anemoi/training/train/forecaster.py | 6 ------ training/src/anemoi/training/train/scaling.py | 4 ++++ training/tests/train/test_loss_scaling.py | 6 ------ 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 91bc35d6..d536402b 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -117,12 +117,6 @@ def __init__( data_indices=data_indices, statistics=statistics, statistics_tendencies=statistics_tendencies, - ) - if scalar_config["name"] == "tendency" - else instantiate( - scalar_config, - scaling_config=config.training.variable_loss_scaling, - data_indices=data_indices, metadata_variables=metadata["dataset"].get("variables_metadata"), ) ) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 0195816b..e771ebc8 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -57,6 +57,7 @@ def __init__( scaling_config: DictConfig, data_indices: IndexCollection, metadata_variables: dict | None = None, + **kwargs ) -> None: """Initialise Scaler. @@ -150,6 +151,7 @@ def __init__( slope: float, name: str, scale_dim: int, + **kwargs ) -> None: """Initialise variable level scaler. @@ -234,6 +236,7 @@ def __init__( slope: float = 0.0, name: str | None = None, scale_dim: int | None = None, + **kwargs ) -> None: """Initialise Scaler with constant scaling of 1.""" assert ( @@ -268,6 +271,7 @@ def __init__( statistics_tendencies: dict, name: str, scale_dim: int, + **kwargs ) -> None: """Initialise variable level scaler. diff --git a/training/tests/train/test_loss_scaling.py b/training/tests/train/test_loss_scaling.py index aef906e8..a6591a58 100644 --- a/training/tests/train/test_loss_scaling.py +++ b/training/tests/train/test_loss_scaling.py @@ -238,12 +238,6 @@ def test_variable_loss_scaling_vals( data_indices=data_indices, statistics=statistics, statistics_tendencies=statistics_tendencies, - ) - if scalar_config["name"] == "tendency" - else instantiate( - scalar_config, - scaling_config=config.training.variable_loss_scaling, - data_indices=data_indices, metadata_variables=None, ) ) From 66d66ed58eec60da91b0e8ae0adc8b8283069141 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Jan 2025 15:19:23 +0000 Subject: [PATCH 34/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- training/src/anemoi/training/train/scaling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index e771ebc8..e7df3f7f 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -57,7 +57,7 @@ def __init__( scaling_config: DictConfig, data_indices: IndexCollection, metadata_variables: dict | None = None, - **kwargs + **kwargs, ) -> None: """Initialise Scaler. @@ -151,7 +151,7 @@ def __init__( slope: float, name: str, scale_dim: int, - **kwargs + **kwargs, ) -> None: """Initialise variable level scaler. @@ -236,7 +236,7 @@ def __init__( slope: float = 0.0, name: str | None = None, scale_dim: int | None = None, - **kwargs + **kwargs, ) -> None: """Initialise Scaler with constant scaling of 1.""" assert ( @@ -271,7 +271,7 @@ def __init__( statistics_tendencies: dict, name: str, scale_dim: int, - **kwargs + **kwargs, ) -> None: """Initialise variable level scaler. From db05ce5d9b805ed79724c670794c4bb384abbd6e Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 15:56:07 +0000 Subject: [PATCH 35/38] use utils function to retrieve variable group and reference for validation metrics as well --- .../src/anemoi/training/train/forecaster.py | 70 +++++++++++-------- training/src/anemoi/training/train/scaling.py | 11 +-- .../training/utils/variables_metadata.py | 22 +++--- 3 files changed, 56 insertions(+), 47 deletions(-) diff --git a/training/src/anemoi/training/train/forecaster.py b/training/src/anemoi/training/train/forecaster.py index 91bc35d6..79fa0bda 100644 --- a/training/src/anemoi/training/train/forecaster.py +++ b/training/src/anemoi/training/train/forecaster.py @@ -6,14 +6,11 @@ # In applying this licence, ECMWF does not waive the privileges and immunities # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. - +from __future__ import annotations import logging from collections import defaultdict -from collections.abc import Generator -from collections.abc import Mapping -from typing import Optional -from typing import Union +from typing import TYPE_CHECKING import pytorch_lightning as pl import torch @@ -21,12 +18,9 @@ from omegaconf import DictConfig from omegaconf import OmegaConf from timm.scheduler import CosineLRScheduler -from torch.distributed.distributed_c10d import ProcessGroup from torch.distributed.optim import ZeroRedundancyOptimizer from torch.utils.checkpoint import checkpoint -from torch_geometric.data import HeteroData -from anemoi.models.data_indices.collection import IndexCollection from anemoi.models.interface import AnemoiModelInterface from anemoi.training.losses.utils import grad_scaler from anemoi.training.losses.weightedloss import BaseWeightedLoss @@ -34,8 +28,18 @@ from anemoi.training.utils.jsonify import map_config_to_primitives from anemoi.training.utils.masks import Boolean1DMask from anemoi.training.utils.masks import NoOutputMask +from anemoi.training.utils.variables_metadata import ExtractVariableGroupAndLevel from anemoi.utils.config import DotDict +if TYPE_CHECKING: + from collections.abc import Generator + from collections.abc import Mapping + + from torch.distributed.distributed_c10d import ProcessGroup + from torch_geometric.data import HeteroData + + from anemoi.models.data_indices.collection import IndexCollection + LOGGER = logging.getLogger(__name__) @@ -129,7 +133,11 @@ def __init__( for scalar_config in config_container ] - self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) + self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges( + config, + data_indices, + metadata["dataset"].get("variables_metadata"), + ) # Check if the model is a stretched grid if graph_data["hidden"].node_type == "StretchedTriNodes": @@ -207,9 +215,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: @staticmethod def get_loss_function( config: DictConfig, - scalars: Union[dict[str, tuple[Union[int, tuple[int, ...], torch.Tensor]]], None] = None, # noqa: FA100 + scalars: dict[str, tuple[int | tuple[int, ...] | torch.Tensor]] | None = None, **kwargs, - ) -> Union[BaseWeightedLoss, torch.nn.ModuleList]: # noqa: FA100 + ) -> BaseWeightedLoss | torch.nn.ModuleList: """Get loss functions from config. Can be ModuleList if multiple losses are specified. @@ -218,7 +226,7 @@ def get_loss_function( ---------- config : DictConfig Loss function configuration, should include `scalars` if scalars are to be added to the loss function. - scalars : Union[dict[str, tuple[Union[int, tuple[int, ...], torch.Tensor]]], None], optional + scalars : dict[str, tuple[int | tuple[int, ...] | torch.Tensor]], optional Scalars which can be added to the loss function. Defaults to None., by default None If a scalar is to be added to the loss, ensure it is in `scalars` in the loss config E.g. @@ -293,18 +301,26 @@ def training_weights_for_imputed_variables( self.updated_loss_mask = True @staticmethod - def get_val_metric_ranges(config: DictConfig, data_indices: IndexCollection) -> tuple[dict, dict]: + def get_val_metric_ranges( + config: DictConfig, + data_indices: IndexCollection, + metadata_variables: dict | None = None, + ) -> tuple[dict, dict]: metric_ranges = defaultdict(list) metric_ranges_validation = defaultdict(list) + variable_groups = config.training.variable_loss_scaling.variable_groups + + extract_variable_group_and_level = ExtractVariableGroupAndLevel( + variable_groups, + metadata_variables, + ) for key, idx in data_indices.internal_model.output.name_to_index.items(): - split = key.split("_") - if len(split) > 1 and split[-1].isdigit(): - # Group metrics for pressure levels (e.g., Q, T, U, V, etc.) - metric_ranges[f"pl_{split[0]}"].append(idx) - else: - metric_ranges[f"sfc_{key}"].append(idx) + variable_group, variable_ref, _ = extract_variable_group_and_level.get_group_and_level(key) + + # Add metrics for grouped variables and variables in default group + metric_ranges[f"{variable_group}_{variable_ref}"].append(idx) # Specific metrics from hydra to log in logger if key in config.training.metrics: @@ -315,13 +331,11 @@ def get_val_metric_ranges(config: DictConfig, data_indices: IndexCollection) -> # metric for validation, after postprocessing for key, idx in data_indices.model.output.name_to_index.items(): - # Split pressure levels on "_" separator - split = key.split("_") - if len(split) > 1 and split[1].isdigit(): - # Create grouped metrics for pressure levels (e.g. Q, T, U, V, etc.) for logger - metric_ranges_validation[f"pl_{split[0]}"].append(idx) - else: - metric_ranges_validation[f"sfc_{key}"].append(idx) + variable_group, variable_ref, _ = extract_variable_group_and_level.get_group_and_level(key) + + # Add metrics for grouped variables and variables in default group + metric_ranges_validation[f"{variable_group}_{variable_ref}"].append(idx) + # Create specific metrics from hydra to log in logger if key in config.training.metrics: metric_ranges_validation[key] = [idx] @@ -392,10 +406,10 @@ def advance_input( def rollout_step( self, batch: torch.Tensor, - rollout: Optional[int] = None, # noqa: FA100 + rollout: int | None = None, training_mode: bool = True, validation_mode: bool = False, - ) -> Generator[tuple[Union[torch.Tensor, None], dict, list], None, None]: # noqa: FA100 + ) -> Generator[tuple[torch.Tensor | None, dict, list], None, None]: """Rollout step for the forecaster. Will run pre_processors on batch, but not post_processors on predictions. diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index 0195816b..56246180 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -72,20 +72,11 @@ def __init__( """ super().__init__(scaling_config, data_indices) self.variable_groups = self.scaling_config.variable_groups - # turn dictionary around - self.group_variables = {} - for group, variables in self.variable_groups.items(): - if isinstance(variables, str): - variables = [variables] - for variable in variables: - self.group_variables[variable] = group - self.default_group = self.scaling_config.variable_groups.default self.metadata_variables = metadata_variables self.extract_variable_group_and_level = ExtractVariableGroupAndLevel( - self.group_variables, + self.variable_groups, self.metadata_variables, - self.default_group, ) def get_variable_group(self, variable_name: str) -> tuple[str, str, int]: diff --git a/training/src/anemoi/training/utils/variables_metadata.py b/training/src/anemoi/training/utils/variables_metadata.py index 71648016..14b5487d 100644 --- a/training/src/anemoi/training/utils/variables_metadata.py +++ b/training/src/anemoi/training/utils/variables_metadata.py @@ -18,24 +18,28 @@ class ExtractVariableGroupAndLevel: Parameters ---------- - group_variables : dict - Dictionary with variable names as keys and groups as values. + variable_groups : dict + Dictionary with groups as keys and variable names as values metadata_variables : dict, optional Dictionary with variable names as keys and metadata as values, by default None - default_group : str, optional - Default group to return if the variable is not found in the group_variables dictionary, by default "sfc" - """ def __init__( self, - group_variables: dict, + variable_groups: dict, metadata_variables: dict | None = None, - default_group: str = "sfc", ) -> None: - self.group_variables = group_variables + self.variable_groups = variable_groups + # turn dictionary around + self.group_variables = {} + for group, variables in self.variable_groups.items(): + if isinstance(variables, str): + variables = [variables] + for variable in variables: + self.group_variables[variable] = group + assert "default" in self.variable_groups, "Default group not defined in variable_groups" + self.default_group = self.variable_groups["default"] self.metadata_variables = metadata_variables - self.default_group = default_group def get_group_and_level(self, variable_name: str) -> tuple[str, str, int]: """Get the group and level of a variable. From 3adf924f7884060b3c186e0ebdebabfe6da64076 Mon Sep 17 00:00:00 2001 From: sahahner Date: Wed, 22 Jan 2025 16:10:06 +0000 Subject: [PATCH 36/38] comment in config file that scler name needs to be added to loss as well. --- training/src/anemoi/training/config/training/default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index daa76866..fd673c73 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -129,6 +129,7 @@ variable_loss_scaling: 2d: 0.5 tp: 0.025 cp: 0.0025 +# Several additional scalars can be added here. In order to be applied their names must be included in the loss. additional_scalars: # pressure level scalar - _target_: anemoi.training.train.scaling.ReluVariableLevelScaler From f19d69dc8125ea4c5099ee6b8f2d594cd2ab9651 Mon Sep 17 00:00:00 2001 From: Mariana Clare Date: Wed, 22 Jan 2025 16:47:16 +0000 Subject: [PATCH 37/38] fix pre-commit hooks --- training/src/anemoi/training/train/scaling.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/training/src/anemoi/training/train/scaling.py b/training/src/anemoi/training/train/scaling.py index e771ebc8..c3488a8b 100644 --- a/training/src/anemoi/training/train/scaling.py +++ b/training/src/anemoi/training/train/scaling.py @@ -72,6 +72,7 @@ def __init__( """ super().__init__(scaling_config, data_indices) + del kwargs self.variable_groups = self.scaling_config.variable_groups # turn dictionary around self.group_variables = {} @@ -171,6 +172,7 @@ def __init__( Slope of scaling function. """ super().__init__(scaling_config, data_indices, metadata_variables) + del kwargs self.scaling_group = group self.y_intercept = y_intercept self.slope = slope @@ -239,6 +241,7 @@ def __init__( **kwargs ) -> None: """Initialise Scaler with constant scaling of 1.""" + del kwargs assert ( y_intercept == 1.0 and slope == 0 ), "self.y_intercept must be 1.0 and self.slope 0.0 for no scaling to fit with definition of linear function." @@ -287,6 +290,7 @@ def __init__( Data statistics dictionary for tendencies """ super().__init__(scaling_config, data_indices) + del kwargs self.statistics = statistics self.statistics_tendencies = statistics_tendencies self.name = name From 00439cb5cbb881f29dd1a8ae962da471b0de244e Mon Sep 17 00:00:00 2001 From: Mariana Clare <31656450+mc4117@users.noreply.github.com> Date: Fri, 24 Jan 2025 16:53:06 +0100 Subject: [PATCH 38/38] Update description in training/default --- training/src/anemoi/training/config/training/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml index fd673c73..ce74f098 100644 --- a/training/src/anemoi/training/config/training/default.yaml +++ b/training/src/anemoi/training/config/training/default.yaml @@ -112,7 +112,7 @@ lr: variable_loss_scaling: # Variable groups definition for scaling by variable level. # The variable level scaling methods are defined under additional_scalars - # The default group is appended as prefix to the metric of all variables not asigned to a group. + # A default group is required and is appended as prefix to the metric of all variables not assigned to a group. variable_groups: default: sfc pl: [q, t, u, v, w, z]