From 1046ef218d9cd85a03af98018dcfd21d99dec45e Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 27 Jun 2024 14:32:09 +0100 Subject: [PATCH 01/13] add config --- pyproject.toml | 2 +- src/anemoi/training/commands/train.py | 65 ++++++++++++++++++++++++++ src/anemoi/training/config/__init__.py | 0 src/anemoi/training/config/config.yaml | 11 +++++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 src/anemoi/training/commands/train.py create mode 100644 src/anemoi/training/config/__init__.py create mode 100644 src/anemoi/training/config/config.yaml diff --git a/pyproject.toml b/pyproject.toml index 12a950ff..41f0a68e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ dynamic = [ ] dependencies = [ "anemoi-datasets[data]>=0.1", - "anemoi-models @ git+https://github.com/ecmwf/anemoi-models.git", + "anemoi-models", "anemoi-utils[provenance]>=0.1.3", "einops>=0.6.1", "hydra-core>=1.3", diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py new file mode 100644 index 00000000..d3ec5d42 --- /dev/null +++ b/src/anemoi/training/commands/train.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +import json +import sys + +import hydra +from anemoi.utils.config import load_raw_config +from omegaconf import OmegaConf + +from . import Command + + +class Train(Command): + + def add_arguments(self, command_parser): + print("aaa") + command_parser.add_argument("--main", action="store_true", help="Run the main function") + command_parser.add_argument("--config", nargs="*", type=str, help="A list of extra config files to load") + + def run(self, args): + # Just a proof of concept + if args.main: + + @hydra.main(config_path="../config", config_name="config") + def hydra_main(cfg): + print(dir(cfg)) + print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) + + del sys.argv[1] # train + del sys.argv[1] # --main + hydra_main() + exit(0) + + hydra.initialize(config_path="../config") + + cfg = hydra.compose(config_name="config") + + # Add project config + # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) + + # Add experiment config + # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) + + # Add user config + cfg = OmegaConf.merge(cfg, OmegaConf.create(load_raw_config("training.yaml", default={}))) + + # Add extra config files specified in the command line + if args.config: + for config in args.config: + print(f"Loading config {config}") + cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) + + print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) + + +command = Train diff --git a/src/anemoi/training/config/__init__.py b/src/anemoi/training/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/anemoi/training/config/config.yaml b/src/anemoi/training/config/config.yaml new file mode 100644 index 00000000..32a64a88 --- /dev/null +++ b/src/anemoi/training/config/config.yaml @@ -0,0 +1,11 @@ +defaults: + - _self_ + +model: + num_channels: 128 +dataloader: + limit_batches: + training: 100 + validation: 100 +training: + max_epochs: 3 From 849c20b6bfe685054bfb6ef2cddc1c52ff2a449b Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 27 Jun 2024 18:04:38 +0100 Subject: [PATCH 02/13] add hydra overrides --- src/anemoi/training/commands/train.py | 54 ++++++++++++++------------ src/anemoi/training/config/config.yaml | 3 ++ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index d3ec5d42..fec2f21e 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -10,7 +10,6 @@ import json -import sys import hydra from anemoi.utils.config import load_raw_config @@ -21,28 +20,22 @@ class Train(Command): - def add_arguments(self, command_parser): - print("aaa") - command_parser.add_argument("--main", action="store_true", help="Run the main function") - command_parser.add_argument("--config", nargs="*", type=str, help="A list of extra config files to load") - - def run(self, args): - # Just a proof of concept - if args.main: + accept_unknown_args = True - @hydra.main(config_path="../config", config_name="config") - def hydra_main(cfg): - print(dir(cfg)) - print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) + def add_arguments(self, command_parser): + command_parser.add_argument( + "--config", + action="append", + type=str, + help="A list of extra config files to load", + default=[], + ) - del sys.argv[1] # train - del sys.argv[1] # --main - hydra_main() - exit(0) + def run(self, args, overrides=[]): - hydra.initialize(config_path="../config") + hydra.initialize(config_path="../config", version_base="1.1") - cfg = hydra.compose(config_name="config") + cfg = hydra.compose(config_name="config", overrides=overrides) # Add project config # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) @@ -51,15 +44,28 @@ def hydra_main(cfg): # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) # Add user config - cfg = OmegaConf.merge(cfg, OmegaConf.create(load_raw_config("training.yaml", default={}))) + cfg = OmegaConf.merge( + cfg, + OmegaConf.create( + load_raw_config( + "training.yaml", + default={}, + ) + ), + ) # Add extra config files specified in the command line - if args.config: - for config in args.config: - print(f"Loading config {config}") - cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) + + for config in args.config: + print(f"Loading config {config}") + cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) + + # We need to reapply the overrides + cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) + # AIFSTrainer(cfg).train() + command = Train diff --git a/src/anemoi/training/config/config.yaml b/src/anemoi/training/config/config.yaml index 32a64a88..7d0c7f22 100644 --- a/src/anemoi/training/config/config.yaml +++ b/src/anemoi/training/config/config.yaml @@ -9,3 +9,6 @@ dataloader: validation: 100 training: max_epochs: 3 + +token: + mflow: None From 4d0207191d43f462d272b3c66774eef372706bbf Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Thu, 27 Jun 2024 19:52:15 +0100 Subject: [PATCH 03/13] better arg parsing --- src/anemoi/training/commands/train.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index fec2f21e..de4b6121 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -10,6 +10,7 @@ import json +import logging import hydra from anemoi.utils.config import load_raw_config @@ -17,10 +18,10 @@ from . import Command +LOGGER = logging.getLogger(__name__) -class Train(Command): - accept_unknown_args = True +class Train(Command): def add_arguments(self, command_parser): command_parser.add_argument( @@ -30,12 +31,13 @@ def add_arguments(self, command_parser): help="A list of extra config files to load", default=[], ) + command_parser.add_argument("overrides", nargs="*", type=str, help="A list of overrides to apply") - def run(self, args, overrides=[]): + def run(self, args): - hydra.initialize(config_path="../config", version_base="1.1") + hydra.initialize(config_path="../config", version_base=None) - cfg = hydra.compose(config_name="config", overrides=overrides) + cfg = hydra.compose(config_name="config", overrides=args.overrides) # Add project config # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) @@ -57,11 +59,11 @@ def run(self, args, overrides=[]): # Add extra config files specified in the command line for config in args.config: - print(f"Loading config {config}") + LOGGER.info(f"Loading config {config}") cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) # We need to reapply the overrides - cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) + cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(args.overrides)) print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) From e5632914ea29daaf80d7610126ef0c8b4e46527e Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 08:43:15 +0100 Subject: [PATCH 04/13] better support of overrides --- src/anemoi/training/commands/train.py | 64 +++++++++++++++++---------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index de4b6121..0ea79803 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -11,59 +11,75 @@ import json import logging +import os +import re import hydra -from anemoi.utils.config import load_raw_config +from anemoi.utils.config import config_path from omegaconf import OmegaConf from . import Command LOGGER = logging.getLogger(__name__) +# https://hydra.cc/docs/advanced/override_grammar/basic/ + +override_regex = re.compile( + r""" + ^ + ( + (~|\+|\+\+)? # optional prefix + (\w+)([/@:\.]\w+)* # key + = # assignment + (.*) # value + ) + | # or + (~ # ~ prefix + (\w+)([/@:\.]\w+) # key + ) + $ + """, + re.VERBOSE, +) + class Train(Command): def add_arguments(self, command_parser): command_parser.add_argument( - "--config", - action="append", - type=str, - help="A list of extra config files to load", - default=[], + "config", nargs="*", type=str, help="A list yaml files to load or a list of overrides to apply" ) - command_parser.add_argument("overrides", nargs="*", type=str, help="A list of overrides to apply") def run(self, args): - hydra.initialize(config_path="../config", version_base=None) + configs = [] + overrides = [] - cfg = hydra.compose(config_name="config", overrides=args.overrides) + for config in args.config: + if override_regex.match(config): + overrides.append(config) + else: + configs.append(config) - # Add project config - # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) + hydra.initialize(config_path="../config", version_base=None) - # Add experiment config - # cfg = OmegaConf.merge(cfg, OmegaConf.create(...)) + cfg = hydra.compose(config_name="config", overrides=overrides) # Add user config - cfg = OmegaConf.merge( - cfg, - OmegaConf.create( - load_raw_config( - "training.yaml", - default={}, - ) - ), - ) + user_config = config_path("training.yaml") + + if os.path.exists(user_config): + LOGGER.info(f"Loading config {user_config}") + cfg = OmegaConf.merge(cfg, OmegaConf.load(user_config)) # Add extra config files specified in the command line - for config in args.config: + for config in configs: LOGGER.info(f"Loading config {config}") cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) # We need to reapply the overrides - cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(args.overrides)) + cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) From 25e552ebc61bf73bcbff6ef2376ca96d122a1c96 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 08:43:41 +0100 Subject: [PATCH 05/13] better support of overrides --- src/anemoi/training/commands/train.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index 0ea79803..07ea078a 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -47,7 +47,10 @@ class Train(Command): def add_arguments(self, command_parser): command_parser.add_argument( - "config", nargs="*", type=str, help="A list yaml files to load or a list of overrides to apply" + "config", + nargs="*", + type=str, + help="A list yaml files to load or a list of overrides to apply", ) def run(self, args): From ba949f7ab89827a9bf4643110a58d2eefbd63891 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 10:44:34 +0100 Subject: [PATCH 06/13] Better check of config arguments --- src/anemoi/training/commands/train.py | 8 ++++++-- src/anemoi/training/config/config.yaml | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index 07ea078a..3b44434e 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -61,12 +61,15 @@ def run(self, args): for config in args.config: if override_regex.match(config): overrides.append(config) - else: + elif config.endswith(".yaml") or config.endswith(".yml"): configs.append(config) + else: + raise ValueError(f"Invalid config '{config}'. It must be a yaml file or an override") hydra.initialize(config_path="../config", version_base=None) - cfg = hydra.compose(config_name="config", overrides=overrides) + cfg = hydra.compose(config_name="config") # , overrides=overrides) + print(cfg) # Add user config user_config = config_path("training.yaml") @@ -82,6 +85,7 @@ def run(self, args): cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) # We need to reapply the overrides + # This does not support overrides with a prefix cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) diff --git a/src/anemoi/training/config/config.yaml b/src/anemoi/training/config/config.yaml index 7d0c7f22..604b5f20 100644 --- a/src/anemoi/training/config/config.yaml +++ b/src/anemoi/training/config/config.yaml @@ -1,14 +1,16 @@ defaults: - - _self_ +- _self_ model: num_channels: 128 + dataloader: limit_batches: training: 100 validation: 100 + training: max_epochs: 3 token: - mflow: None + mlflow: null From c7718df9d869db12f05a681c2f4f37e4c42e53e2 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 11:57:06 +0100 Subject: [PATCH 07/13] Add documentation --- docs/cli/introduction.rst | 24 ++++++++++++++++ docs/cli/train.rst | 41 +++++++++++++++++++++++++++ docs/cli/train.yaml | 2 ++ docs/index.rst | 13 +++++++++ src/anemoi/training/commands/train.py | 2 +- 5 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 docs/cli/introduction.rst create mode 100644 docs/cli/train.rst create mode 100644 docs/cli/train.yaml diff --git a/docs/cli/introduction.rst b/docs/cli/introduction.rst new file mode 100644 index 00000000..045c3303 --- /dev/null +++ b/docs/cli/introduction.rst @@ -0,0 +1,24 @@ +Introduction +============ + +When you install the `anemoi-training` package, this will also install command line tool +called ``anemoi-training`` which can be used to train models. + +The tool can provide help with the ``--help`` options: + +.. code-block:: bash + + % anemoi-training --help + +The commands are: + +.. toctree:: + :maxdepth: 1 + + train + +.. argparse:: + :module: anemoi.training.__main__ + :func: create_parser + :prog: anemoi-training + :nosubcommands: diff --git a/docs/cli/train.rst b/docs/cli/train.rst new file mode 100644 index 00000000..391f6227 --- /dev/null +++ b/docs/cli/train.rst @@ -0,0 +1,41 @@ +train +====== + +Use this command to create a train a model: + +.. code-block:: bash + + % anemoi-training train config.yaml + +The command will read the default configuration and override it with the values in the provided configuration file. +The configuration file should be a YAML file with the structure defined in the `Configuration` section. The file `config.yaml` will typically destribes +the model to be trained, the dataset to be used, and the training hyperparameters: + +.. literalinclude:: train.yaml + :language: yaml + +You can provide more that one configuration file, in which case the values will be merged in the order they are provided. A typical usage would be +to split the training configurations into model description, training hyperparameters and runtime options + +.. code-block:: bash + + % anemoi-training train model.yaml hyperparameters.yaml slurm.yaml + +Furthermore, you can also provide values directly on the command line, which will override any values in the configuration files: + +.. code-block:: bash + + % anemoi-training train config.yaml tracker.mlflow.tracking_uri=http://localhost:5000 + +If the file `~/.config/anemoi/train.yaml` exists, it will be loaded after the defaults and before any other configuration file. +This allows you to provide values such as passwords or other sensitive information that you do not want to store a git repository. + +********************* + Command line usage +********************* + +.. argparse:: + :module: anemoi.training.__main__ + :func: create_parser + :prog: anemoi-training + :path: train diff --git a/docs/cli/train.yaml b/docs/cli/train.yaml new file mode 100644 index 00000000..0d0f990d --- /dev/null +++ b/docs/cli/train.yaml @@ -0,0 +1,2 @@ +training: + max_epochs: 10 diff --git a/docs/index.rst b/docs/index.rst index d0b37514..4ede822a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,19 @@ of the *Anemoi* packages. installing +**Command line tool** + +- :doc:`cli/introduction` +- :doc:`cli/train` + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Command line tool + + cli/introduction + cli/train + ***************** Anemoi packages ***************** diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index 3b44434e..f5fe1fbf 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -72,7 +72,7 @@ def run(self, args): print(cfg) # Add user config - user_config = config_path("training.yaml") + user_config = config_path("train.yaml") if os.path.exists(user_config): LOGGER.info(f"Loading config {user_config}") From 6cecb1495ef84aca5af0cf6e3f4659c53ab2b4e0 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 11:59:08 +0100 Subject: [PATCH 08/13] Add documentation --- docs/cli/train.rst | 54 ++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/cli/train.rst b/docs/cli/train.rst index 391f6227..0e745bc2 100644 --- a/docs/cli/train.rst +++ b/docs/cli/train.rst @@ -1,41 +1,49 @@ -train -====== +####### + train +####### Use this command to create a train a model: -.. code-block:: bash +.. code:: bash - % anemoi-training train config.yaml + % anemoi-training train config.yaml -The command will read the default configuration and override it with the values in the provided configuration file. -The configuration file should be a YAML file with the structure defined in the `Configuration` section. The file `config.yaml` will typically destribes -the model to be trained, the dataset to be used, and the training hyperparameters: +The command will read the default configuration and override it with the +values in the provided configuration file. The configuration file should +be a YAML file with the structure defined in the `Configuration` +section. The file `config.yaml` will typically destribes the model to be +trained, the dataset to be used, and the training hyperparameters: .. literalinclude:: train.yaml - :language: yaml + :language: yaml -You can provide more that one configuration file, in which case the values will be merged in the order they are provided. A typical usage would be -to split the training configurations into model description, training hyperparameters and runtime options +You can provide more that one configuration file, in which case the +values will be merged in the order they are provided. A typical usage +would be to split the training configurations into model description, +training hyperparameters and runtime options -.. code-block:: bash +.. code:: bash - % anemoi-training train model.yaml hyperparameters.yaml slurm.yaml + % anemoi-training train model.yaml hyperparameters.yaml slurm.yaml -Furthermore, you can also provide values directly on the command line, which will override any values in the configuration files: +Furthermore, you can also provide values directly on the command line, +which will override any values in the configuration files: -.. code-block:: bash +.. code:: bash - % anemoi-training train config.yaml tracker.mlflow.tracking_uri=http://localhost:5000 + % anemoi-training train config.yaml tracker.mlflow.tracking_uri=http://localhost:5000 -If the file `~/.config/anemoi/train.yaml` exists, it will be loaded after the defaults and before any other configuration file. -This allows you to provide values such as passwords or other sensitive information that you do not want to store a git repository. +If the file `~/.config/anemoi/train.yaml` exists, it will be loaded +after the defaults and before any other configuration file. This allows +you to provide values such as passwords or other sensitive information +that you do not want to store a git repository. -********************* +******************** Command line usage -********************* +******************** .. argparse:: - :module: anemoi.training.__main__ - :func: create_parser - :prog: anemoi-training - :path: train + :module: anemoi.training.__main__ + :func: create_parser + :prog: anemoi-training + :path: train From 9e9c1b53f862b85bd357cf0ce30d3aad43147e3d Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 16:55:38 +0100 Subject: [PATCH 09/13] Better implementation of overrides --- src/anemoi/training/commands/train.py | 87 ++++++++++++++++++++++++-- src/anemoi/training/config/config.yaml | 2 +- 2 files changed, 82 insertions(+), 7 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index f5fe1fbf..9b9c703a 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -16,6 +16,7 @@ import hydra from anemoi.utils.config import config_path +from hydra.errors import ConfigCompositionException from omegaconf import OmegaConf from . import Command @@ -43,6 +44,76 @@ ) +def apply_delete_override(cfg, dotkey, value, parent, key, value_given): + + any_value = object() + + if value_given: + value_given = value + else: + value_given = any_value + + value = OmegaConf.select(cfg, dotkey, throw_on_missing=False) + if value_given is not any_value and value != value_given: + raise ConfigCompositionException( + f"Key '{dotkey}' with value '{value}' does not match the value '{value_given}' in the override" + ) + + try: + # Allow 'del' + OmegaConf.set_struct(cfg, False) + if key is None: + # Top level key + del cfg[parent] + else: + subtree = OmegaConf.select(cfg, parent) + del subtree[key] + finally: + OmegaConf.set_struct(cfg, True) + + +def apply_add_override_force(cfg, dotkey, value, parent, key): + OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) + + +def apply_add_override(cfg, dotkey, value, parent, key): + current = OmegaConf.select(cfg, dotkey, throw_on_missing=False) + if current is not None: + raise ConfigCompositionException(f"Cannot add key '{dotkey}' because it already exists, use '++' to force add") + + OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) + + +def apply_assign_override(cfg, dotkey, value, parent, key): + OmegaConf.update(cfg, dotkey, value, merge=True) + + +def parse_override(override, n): + dotkey = override[n:] + parsed = OmegaConf.from_dotlist([dotkey]) + dotkey = dotkey.split("=")[0] + value = OmegaConf.select(parsed, dotkey) + + if "." in dotkey: + parent, key = dotkey.rsplit(".", 1) + return dotkey, value, parent, key + else: + return dotkey, value, dotkey, None + + +def apply_override(cfg, override): + if override.startswith("~"): + return apply_delete_override(cfg, *parse_override(override, 1), value_given="=" in override) + + if override.startswith("++"): + return apply_add_override_force(cfg, *parse_override(override, 2)) + + if override.startswith("+"): + return apply_add_override(cfg, *parse_override(override, 1)) + + return apply_assign_override(cfg, *parse_override(override, 0)) + + class Train(Command): def add_arguments(self, command_parser): @@ -68,15 +139,14 @@ def run(self, args): hydra.initialize(config_path="../config", version_base=None) - cfg = hydra.compose(config_name="config") # , overrides=overrides) - print(cfg) + cfg = hydra.compose(config_name="config") # Add user config user_config = config_path("train.yaml") if os.path.exists(user_config): LOGGER.info(f"Loading config {user_config}") - cfg = OmegaConf.merge(cfg, OmegaConf.load(user_config)) + cfg = OmegaConf.merge(cfg, OmegaConf.load(user_config, resolve=True)) # Add extra config files specified in the command line @@ -85,10 +155,15 @@ def run(self, args): cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) # We need to reapply the overrides - # This does not support overrides with a prefix - cfg = OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) + # OmegaConf do not implement the prefix logic, this is done by hydra + for override in overrides: + LOGGER.info(f"Applying override {override}") + apply_override(cfg, override) + + # Resolve the config + OmegaConf.resolve(cfg) - print(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=4)) + print(json.dumps(OmegaConf.to_container(cfg), indent=4)) # AIFSTrainer(cfg).train() diff --git a/src/anemoi/training/config/config.yaml b/src/anemoi/training/config/config.yaml index 604b5f20..368ea589 100644 --- a/src/anemoi/training/config/config.yaml +++ b/src/anemoi/training/config/config.yaml @@ -13,4 +13,4 @@ training: max_epochs: 3 token: - mlflow: null + mlflow: 8 From d7dc7cbcbe810e91854fb8a561997b18f720d62d Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 17:04:58 +0100 Subject: [PATCH 10/13] Better implementation of overrides --- src/anemoi/training/commands/train.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index 9b9c703a..b34a56c5 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -48,15 +48,14 @@ def apply_delete_override(cfg, dotkey, value, parent, key, value_given): any_value = object() - if value_given: - value_given = value - else: - value_given = any_value + if not value_given: + assert value is None + value = any_value - value = OmegaConf.select(cfg, dotkey, throw_on_missing=False) - if value_given is not any_value and value != value_given: + current = OmegaConf.select(cfg, dotkey, throw_on_missing=False) + if value not in (any_value, current): raise ConfigCompositionException( - f"Key '{dotkey}' with value '{value}' does not match the value '{value_given}' in the override" + f"Key '{dotkey}' with value '{current}' does not match the value '{value}' in the override" ) try: From cca169870bea281e6890904e4b72f643b8a02133 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Fri, 28 Jun 2024 17:09:33 +0100 Subject: [PATCH 11/13] Bug fix in delete overrides --- src/anemoi/training/commands/train.py | 40 +++++++++++++-------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index b34a56c5..a2767732 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -29,14 +29,14 @@ r""" ^ ( - (~|\+|\+\+)? # optional prefix - (\w+)([/@:\.]\w+)* # key - = # assignment - (.*) # value + (~|\+|\+\+)? # optional prefix + (\w+)([/@:\.]\w+)* # key + = # assignment + (.*) # value ) - | # or - (~ # ~ prefix - (\w+)([/@:\.]\w+) # key + | # or + (~ # ~ prefix + (\w+)([/@:\.]\w+)* # key ) $ """, @@ -44,7 +44,7 @@ ) -def apply_delete_override(cfg, dotkey, value, parent, key, value_given): +def apply_delete_override(cfg, dotkey, value, value_given): any_value = object() @@ -61,21 +61,24 @@ def apply_delete_override(cfg, dotkey, value, parent, key, value_given): try: # Allow 'del' OmegaConf.set_struct(cfg, False) - if key is None: - # Top level key - del cfg[parent] - else: + + if "." in dotkey: + parent, key = dotkey.rsplit(".", 1) subtree = OmegaConf.select(cfg, parent) del subtree[key] + else: + # Top level key + del cfg[dotkey] + finally: OmegaConf.set_struct(cfg, True) -def apply_add_override_force(cfg, dotkey, value, parent, key): +def apply_add_override_force(cfg, dotkey, value): OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) -def apply_add_override(cfg, dotkey, value, parent, key): +def apply_add_override(cfg, dotkey, value): current = OmegaConf.select(cfg, dotkey, throw_on_missing=False) if current is not None: raise ConfigCompositionException(f"Cannot add key '{dotkey}' because it already exists, use '++' to force add") @@ -83,7 +86,7 @@ def apply_add_override(cfg, dotkey, value, parent, key): OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) -def apply_assign_override(cfg, dotkey, value, parent, key): +def apply_assign_override(cfg, dotkey, value): OmegaConf.update(cfg, dotkey, value, merge=True) @@ -92,12 +95,7 @@ def parse_override(override, n): parsed = OmegaConf.from_dotlist([dotkey]) dotkey = dotkey.split("=")[0] value = OmegaConf.select(parsed, dotkey) - - if "." in dotkey: - parent, key = dotkey.rsplit(".", 1) - return dotkey, value, parent, key - else: - return dotkey, value, dotkey, None + return dotkey, value def apply_override(cfg, override): From 4e91faf613d2caf7ee3d57df044c51e1b9376810 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Wed, 3 Jul 2024 09:58:02 +0100 Subject: [PATCH 12/13] bug fix --- src/anemoi/training/commands/train.py | 4 ++-- src/anemoi/training/config/config.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index a2767732..3406312e 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -139,11 +139,11 @@ def run(self, args): cfg = hydra.compose(config_name="config") # Add user config - user_config = config_path("train.yaml") + user_config = config_path("training.yaml") if os.path.exists(user_config): LOGGER.info(f"Loading config {user_config}") - cfg = OmegaConf.merge(cfg, OmegaConf.load(user_config, resolve=True)) + cfg = OmegaConf.merge(cfg, OmegaConf.load(user_config)) # Add extra config files specified in the command line diff --git a/src/anemoi/training/config/config.yaml b/src/anemoi/training/config/config.yaml index 368ea589..604b5f20 100644 --- a/src/anemoi/training/config/config.yaml +++ b/src/anemoi/training/config/config.yaml @@ -13,4 +13,4 @@ training: max_epochs: 3 token: - mlflow: 8 + mlflow: null From b9e10e5c2fdf694c9fa408259802268bd6346790 Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Sun, 7 Jul 2024 10:42:09 +0100 Subject: [PATCH 13/13] Simplify hydra overrides --- pyproject.toml | 4 +- src/anemoi/training/commands/train.py | 81 ++------------------------- 2 files changed, 6 insertions(+), 79 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 41f0a68e..6b817245 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,9 +50,9 @@ dynamic = [ "version", ] dependencies = [ - "anemoi-datasets[data]>=0.1", + "anemoi-datasets[data]>=0.4", "anemoi-models", - "anemoi-utils[provenance]>=0.1.3", + "anemoi-utils[provenance]>=0.3.10", "einops>=0.6.1", "hydra-core>=1.3", "matplotlib>=3.7.1", diff --git a/src/anemoi/training/commands/train.py b/src/anemoi/training/commands/train.py index a2767732..07d22a35 100644 --- a/src/anemoi/training/commands/train.py +++ b/src/anemoi/training/commands/train.py @@ -16,7 +16,6 @@ import hydra from anemoi.utils.config import config_path -from hydra.errors import ConfigCompositionException from omegaconf import OmegaConf from . import Command @@ -29,88 +28,16 @@ r""" ^ ( - (~|\+|\+\+)? # optional prefix (\w+)([/@:\.]\w+)* # key = # assignment (.*) # value ) - | # or - (~ # ~ prefix - (\w+)([/@:\.]\w+)* # key - ) $ """, re.VERBOSE, ) -def apply_delete_override(cfg, dotkey, value, value_given): - - any_value = object() - - if not value_given: - assert value is None - value = any_value - - current = OmegaConf.select(cfg, dotkey, throw_on_missing=False) - if value not in (any_value, current): - raise ConfigCompositionException( - f"Key '{dotkey}' with value '{current}' does not match the value '{value}' in the override" - ) - - try: - # Allow 'del' - OmegaConf.set_struct(cfg, False) - - if "." in dotkey: - parent, key = dotkey.rsplit(".", 1) - subtree = OmegaConf.select(cfg, parent) - del subtree[key] - else: - # Top level key - del cfg[dotkey] - - finally: - OmegaConf.set_struct(cfg, True) - - -def apply_add_override_force(cfg, dotkey, value): - OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) - - -def apply_add_override(cfg, dotkey, value): - current = OmegaConf.select(cfg, dotkey, throw_on_missing=False) - if current is not None: - raise ConfigCompositionException(f"Cannot add key '{dotkey}' because it already exists, use '++' to force add") - - OmegaConf.update(cfg, dotkey, value, merge=True, force_add=True) - - -def apply_assign_override(cfg, dotkey, value): - OmegaConf.update(cfg, dotkey, value, merge=True) - - -def parse_override(override, n): - dotkey = override[n:] - parsed = OmegaConf.from_dotlist([dotkey]) - dotkey = dotkey.split("=")[0] - value = OmegaConf.select(parsed, dotkey) - return dotkey, value - - -def apply_override(cfg, override): - if override.startswith("~"): - return apply_delete_override(cfg, *parse_override(override, 1), value_given="=" in override) - - if override.startswith("++"): - return apply_add_override_force(cfg, *parse_override(override, 2)) - - if override.startswith("+"): - return apply_add_override(cfg, *parse_override(override, 1)) - - return apply_assign_override(cfg, *parse_override(override, 0)) - - class Train(Command): def add_arguments(self, command_parser): @@ -134,6 +61,7 @@ def run(self, args): else: raise ValueError(f"Invalid config '{config}'. It must be a yaml file or an override") + # We could apply the overrides here. To be tested hydra.initialize(config_path="../config", version_base=None) cfg = hydra.compose(config_name="config") @@ -151,11 +79,10 @@ def run(self, args): LOGGER.info(f"Loading config {config}") cfg = OmegaConf.merge(cfg, OmegaConf.load(config)) - # We need to reapply the overrides + # Apply overrides # OmegaConf do not implement the prefix logic, this is done by hydra - for override in overrides: - LOGGER.info(f"Applying override {override}") - apply_override(cfg, override) + # If needed, the logic can be implemented here (look in the git history for an example) + OmegaConf.merge(cfg, OmegaConf.from_dotlist(overrides)) # Resolve the config OmegaConf.resolve(cfg)