From 67998b836345336dae4514786c027d3ac1d10347 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 27 Sep 2024 17:37:16 +0200 Subject: [PATCH] read projection from datastore config extra section --- README.md | 42 -------------- neural_lam/datastore/mdp.py | 55 ++++++++++++------- neural_lam/datastore/plot_example.py | 6 +- pyproject.toml | 2 +- .../datastore_examples/mdp/danra.example.yaml | 16 ++++-- 5 files changed, 51 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index e4a1989b..f23637c9 100644 --- a/README.md +++ b/README.md @@ -258,48 +258,6 @@ Except for training and pre-processing scripts all the source code can be found Model classes, including abstract base classes, are located in `neural_lam/models`. Notebooks for visualization and analysis are located in `docs`. - -## Format of data directory -It is possible to store multiple datasets in the `data` directory. -Each dataset contains a set of files with static features and a set of samples. -The samples are split into different sub-directories for training, validation and testing. -The directory structure is shown with examples below. -Script names within parenthesis denote the script used to generate the file. -``` -data -├── dataset1 -│ ├── samples - Directory with data samples -│ │ ├── train - Training data -│ │ │ ├── nwp_2022040100_mbr000.npy - A time series sample -│ │ │ ├── nwp_2022040100_mbr001.npy -│ │ │ ├── ... -│ │ │ ├── nwp_2022043012_mbr001.npy -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040100.npy - Solar flux forcing -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040112.npy -│ │ │ ├── ... -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022043012.npy -│ │ │ ├── wtr_2022040100.npy - Open water features for one sample -│ │ │ ├── wtr_2022040112.npy -│ │ │ ├── ... -│ │ │ └── wtr_202204012.npy -│ │ ├── val - Validation data -│ │ └── test - Test data -│ └── static - Directory with graph information and static features -│ ├── nwp_xy.npy - Coordinates of grid nodes (part of dataset) -│ ├── surface_geopotential.npy - Geopotential at surface of grid nodes (part of dataset) -│ ├── border_mask.npy - Mask with True for grid nodes that are part of border (part of dataset) -│ ├── grid_features.pt - Static features of grid nodes (neural_lam.create_grid_features) -│ ├── parameter_mean.pt - Means of state parameters (neural_lam.create_parameter_weights) -│ ├── parameter_std.pt - Std.-dev. of state parameters (neural_lam.create_parameter_weights) -│ ├── diff_mean.pt - Means of one-step differences (neural_lam.create_parameter_weights) -│ ├── diff_std.pt - Std.-dev. of one-step differences (neural_lam.create_parameter_weights) -│ ├── flux_stats.pt - Mean and std.-dev. of solar flux forcing (neural_lam.create_parameter_weights) -│ └── parameter_weights.npy - Loss weights for different state parameters (neural_lam.create_parameter_weights) -├── dataset2 -├── ... -└── datasetN -``` - ## Format of graph directory The `graphs` directory contains generated graph structures that can be used by different graph-based models. The structure is shown with examples below: diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 18a8df26..7384396d 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -333,7 +333,17 @@ def boundary_mask(self) -> xr.DataArray: @property def coords_projection(self) -> ccrs.Projection: - """Return the projection of the coordinates. + """ + Return the projection of the coordinates. + + NOTE: currently this expects the projection information to be in the + `extra` section of the configuration file, with a `projection` key + containing a `class_name` and `kwargs` for constructing the + `cartopy.crs.Projection` object. This is a temporary solution until + the projection information can be parsed in the produced dataset + itself. `mllam-data-prep` ignores the contents of the `extra` section + of the config file which is why we need to check that the necessary + parts are there. Returns ------- @@ -341,26 +351,33 @@ def coords_projection(self) -> ccrs.Projection: The projection of the coordinates. """ - # XXX: this should move to config - kwargs = { - "LoVInDegrees": 25.0, - "LaDInDegrees": 56.7, - "Latin1InDegrees": 56.7, - "Latin2InDegrees": 56.7, - } - - lon_0 = kwargs["LoVInDegrees"] # Latitude of first standard parallel - lat_0 = kwargs["LaDInDegrees"] # Latitude of second standard parallel - lat_1 = kwargs["Latin1InDegrees"] # Origin latitude - lat_2 = kwargs["Latin2InDegrees"] # Origin longitude + if "projection" not in self._config.extra: + raise ValueError( + "projection information not found in the configuration file " + f"({self._config_path}). Please add the projection information" + "to the `extra` section of the config, by adding a " + "`projection` key with the class name and kwargs of the " + "projection." + ) - crs = ccrs.LambertConformal( - central_longitude=lon_0, - central_latitude=lat_0, - standard_parallels=(lat_1, lat_2), - ) + projection_info = self._config.extra["projection"] + if "class_name" not in projection_info: + raise ValueError( + "class_name not found in the projection information. Please " + "add the class name of the projection to the `projection` key " + "in the `extra` section of the config." + ) + if "kwargs" not in projection_info: + raise ValueError( + "kwargs not found in the projection information. Please add " + "the keyword arguments of the projection to the `projection` " + "key in the `extra` section of the config." + ) - return crs + class_name = projection_info["class_name"] + ProjectionClass = getattr(ccrs, class_name) + kwargs = projection_info["kwargs"] + return ProjectionClass(**kwargs) @property def grid_shape_state(self): diff --git a/neural_lam/datastore/plot_example.py b/neural_lam/datastore/plot_example.py index 53bc6d5e..b68d33af 100644 --- a/neural_lam/datastore/plot_example.py +++ b/neural_lam/datastore/plot_example.py @@ -119,10 +119,8 @@ def _parse_dict(arg_str): nargs="+", default=[], type=_parse_dict, - help=( - "Selections to apply to the dataarray, for example " - '`time="1990-09-03T0:00" would select this single timestep', - ), + help="Selections to apply to the dataarray, for example " + "`time='1990-09-03T0:00' would select this single timestep", ) args = parser.parse_args() diff --git a/pyproject.toml b/pyproject.toml index fc3fbf9e..15d59be2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "torch-geometric==2.3.1", "parse>=1.20.2", "dataclass-wizard>=0.22.3", - "mllam-data-prep[dask-distributed]>=0.3.0", + "mllam-data-prep @ git+https://github.com/leifdenby/mllam-data-prep/@feat/extra-section-in-config", ] requires-python = ">=3.9" diff --git a/tests/datastore_examples/mdp/danra.example.yaml b/tests/datastore_examples/mdp/danra.example.yaml index 73aa0dfa..0801f832 100644 --- a/tests/datastore_examples/mdp/danra.example.yaml +++ b/tests/datastore_examples/mdp/danra.example.yaml @@ -1,4 +1,4 @@ -schema_version: v0.2.0 +schema_version: v0.2.0+dev dataset_version: v0.1.0 output: @@ -49,7 +49,7 @@ inputs: state_feature: method: stack_variables_by_var_name dims: [altitude] - name_format: f"{var_name}{altitude}m" + name_format: "{var_name}{altitude}m" grid_index: method: stack dims: [x, y] @@ -70,7 +70,7 @@ inputs: dims: [x, y] forcing_feature: method: stack_variables_by_var_name - name_format: f"{var_name}" + name_format: "{var_name}" target_output_variable: forcing danra_lsm: @@ -84,5 +84,13 @@ inputs: dims: [x, y] static_feature: method: stack_variables_by_var_name - name_format: f"{var_name}" + name_format: "{var_name}" target_output_variable: static + +extra: + projection: + class_name: LambertConformal + kwargs: + central_longitude: 25.0 + central_latitude: 56.7 + standard_parallels: [56.7, 56.7]