From f7dee12a88eb982020e3c720482c233cc52e5d18 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Thu, 12 Sep 2024 02:04:28 +0100 Subject: [PATCH 01/13] fix: add `console.log` and remove unused variables --- python/clim_recal/cli.py | 8 ++++---- python/clim_recal/resample.py | 30 +++++++++++------------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/python/clim_recal/cli.py b/python/clim_recal/cli.py index 99bb551e..9d73b0a6 100644 --- a/python/clim_recal/cli.py +++ b/python/clim_recal/cli.py @@ -65,10 +65,10 @@ def pipeline( all_runs: Annotated[bool, typer.Option("--all-runs")] = False, default_runs: Annotated[bool, typer.Option("--default-runs")] = False, all_methods: Annotated[bool, typer.Option("--all-methods")] = False, - cpm_projection: Annotated[bool, typer.Option("--project-cpm")] = True, - hads_projection: Annotated[bool, typer.Option("--project-hads")] = True, - crop_hads: Annotated[bool, typer.Option("--crop-cpm")] = True, - crop_cpm: Annotated[bool, typer.Option("--crop-hads")] = True, + cpm_projection: Annotated[bool, typer.Option(" /--skip-cpm")] = True, + hads_projection: Annotated[bool, typer.Option(" /--skip-hads")] = True, + crop_hads: Annotated[bool, typer.Option(" /--skip-crop-cpm")] = True, + crop_cpm: Annotated[bool, typer.Option(" /--skip-crop-hads")] = True, execute: Annotated[bool, typer.Option("--execute")] = False, start_index: Annotated[int, typer.Option("--start-index", "-s", min=0)] = 0, total: Annotated[int, typer.Option("--total-from-index", "-t", min=0)] = 0, diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index 40867c43..0b9f6926 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -13,10 +13,8 @@ from typing import Any, Callable, Final, Iterable, Iterator, Literal, Sequence import dill as pickle -import numpy as np import rioxarray # nopycln: import from osgeo.gdal import Dataset as GDALDataset -from osgeo.gdal import GRA_NearestNeighbour from rich import print from tqdm.rich import trange from xarray import Dataset @@ -59,33 +57,17 @@ ChangeDayType = set[tuple[int, int]] CLIMATE_DATA_MOUNT_PATH: Path = climate_data_mount_path() -DEFAULT_INTERPOLATION_METHOD: str = "linear" -"""Default method to infer missing estimates in a time series.""" CFCalendarSTANDARD: Final[str] = "standard" RESAMPLING_OUTPUT_PATH: Final[PathLike] = ( CLIMATE_DATA_MOUNT_PATH / "CPM-365/andys-two-gdal-step-approach/resample" ) -CROP_OUTPUT_PATH: Final[PathLike] = ( - CLIMATE_DATA_MOUNT_PATH / "CPM-365/andys-two-gdal-step-approach/crop" -) RAW_HADS_PATH: Final[PathLike] = CLIMATE_DATA_MOUNT_PATH / "Raw/HadsUKgrid" RAW_CPM_PATH: Final[PathLike] = CLIMATE_DATA_MOUNT_PATH / "Raw/UKCP2.2" RAW_HADS_TASMAX_PATH: Final[PathLike] = RAW_HADS_PATH / "tasmax/day" RAW_CPM_TASMAX_PATH: Final[PathLike] = RAW_CPM_PATH / "tasmax/01/latest" -# TODO: remove REPROJECTED_CPM_TASMAX_05_LATEST_INPUT_PATH -REPROJECTED_CPM_TASMAX_05_LATEST_INPUT_PATH: Final[PathLike] = Path( - CLIMATE_DATA_MOUNT_PATH / "Reprojected_infill/UKCP2.2/tasmax/05/latest" -) - -CPRUK_RESAMPLING_METHOD: Final[str] = GRA_NearestNeighbour -ResamplingArgs = tuple[PathLike, np.ndarray, np.ndarray, PathLike] -ResamplingCallable = Callable[[list | tuple], int] -CPM_STANDARD_CALENDAR_PATH: Final[Path] = Path("cpm-standard-calendar") -CPM_SPATIAL_COORDS_PATH: Final[Path] = Path("cpm-to-27700-spatial") - CPM_OUTPUT_LOCAL_PATH: Final[Path] = Path("cpm") HADS_OUTPUT_LOCAL_PATH: Final[Path] = Path("hads") CPM_CROP_OUTPUT_LOCAL_PATH: Final[Path] = Path("cpm-crop") @@ -283,6 +265,7 @@ def range_crop_projection( step: int = 1, override_export_path: Path | None = None, return_results: bool = False, + delete_xarray_after_save: bool = True, **kwargs, ) -> list[Path]: start = start or self.start_index @@ -312,6 +295,9 @@ def crop_projection( **kwargs, ) -> Path | T_Dataset: """Crop a projection to `region` geometry.""" + console.debug( + f"Preparing to crop `_reprojected_paths` index {index} from {self}" + ) try: assert hasattr(self, "_reprojected_paths") except AssertionError: @@ -333,6 +319,7 @@ def crop_projection( path.mkdir(exist_ok=True, parents=True) resampled_xr: Dataset = self._reprojected_paths[index] + console.debug(f"From {self} crop {xr_time_series}") cropped: Dataset = crop_xarray( xr_time_series=resampled_xr, crop_box=RegionOptions.bounding_box(self.crop_region), @@ -445,7 +432,9 @@ def to_reprojection( index=index, source_to_index=source_to_index ) path: PathLike = self.output_path + console.debug(f"Setting 'cpm_for_coord_alignment' for {self}") self.set_cpm_for_coord_alignment() + console.debug(f"Set 'cpm_for_coord_alignment' for {self}") return apply_geo_func( source_path=source_path, func=self._resample_func, @@ -497,7 +486,7 @@ class CPMResampler(ResamblerBase): >>> resample_test_cpm_output_path: Path = getfixture( ... 'resample_test_cpm_output_path') >>> cpm_resampler: CPMResampler = CPMResampler( - ... input_path=REPROJECTED_CPM_TASMAX_05_LATEST_INPUT_PATH, + ... input_path=RAW_CPM_TASMAX_PATH, ... output_path=resample_test_cpm_output_path, ... input_file_extension=TIF_EXTENSION_STR, ... ) @@ -518,6 +507,7 @@ class CPMResampler(ResamblerBase): crop_path: PathLike = RESAMPLING_OUTPUT_PATH / CPM_CROP_OUTPUT_LOCAL_PATH input_file_x_column_name: str = CPM_RAW_X_COLUMN_NAME input_file_y_column_name: str = CPM_RAW_Y_COLUMN_NAME + prior_time_series: PathLike | Dataset | None = None _resample_func: ReprojectFuncType = cpm_reproject_with_standard_calendar @property @@ -535,6 +525,7 @@ def to_reprojection( index=index, source_to_index=source_to_index ) path: PathLike = self.output_path + console.debug(f"Reprojecting index CPM {index}...") result: Path | T_Dataset | GDALDataset = apply_geo_func( source_path=source_path, func=self._resample_func, @@ -544,6 +535,7 @@ def to_reprojection( variable_name=self.cpm_variable_name, return_results=return_results, ) + console.debug(f"Completed index CPM {index}...") if isinstance(result, PathLike): if not hasattr(self, "_reprojected_paths"): self._reprojected_paths: list[Path] = [] From c7e61e672a9de479b393ff933da4ccdd50b90f3f Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Thu, 12 Sep 2024 11:40:14 +0100 Subject: [PATCH 02/13] feat: add `datadrive` to `docker` `linux` `mount` config --- compose/ua-linux-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/compose/ua-linux-compose.yml b/compose/ua-linux-compose.yml index eff20e56..1e1f58a6 100644 --- a/compose/ua-linux-compose.yml +++ b/compose/ua-linux-compose.yml @@ -7,6 +7,7 @@ services: - NB_GID=1001 volumes: - /mnt/vmfileshare:/mnt/vmfileshare + - /datadrive:/datadrive rstudio: volumes: From a6c5abcea88265a6db3a4831b0a74efe5a6d3f47 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Thu, 12 Sep 2024 11:44:40 +0100 Subject: [PATCH 03/13] feat: add `bash/andy-cli.bash` --- bash/andy-cli.bash | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 bash/andy-cli.bash diff --git a/bash/andy-cli.bash b/bash/andy-cli.bash new file mode 100644 index 00000000..111e7c14 --- /dev/null +++ b/bash/andy-cli.bash @@ -0,0 +1,25 @@ +#!/bin/bash +set -e +set -x + +# Start-index goes from 1 +max_index=500 + +# Input and output paths +hads_input_path="/datadrive/HadsUKgrid/" +cpm_input_path="/datadrive/UKCP2.2/" +output_path="/datadrive/clim-recal-results/group_run_2024_09_11_1500" + +for i in $(seq 1 $max_index); do + echo "Running for index={$i}" + { + clim-recal \ + --start-index $i \ + --total-from-index 1 \ + --hads-input-path $hads_input_path \ + --cpm-input-path $cpm_input_path \ + --output-path $output_path \ + --execute + } 2>&1 | tee /datadrive/clim-recal-results/group_run_2024_09_12_1000/logs/log_$i.txt + +done From ab96f1166958fc6fd2b3d7c51521b50b5729fe86 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Thu, 12 Sep 2024 12:03:15 +0100 Subject: [PATCH 04/13] minor tweaks to path varibles --- bash/andy-cli.bash | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bash/andy-cli.bash b/bash/andy-cli.bash index 111e7c14..eb9bf1d6 100644 --- a/bash/andy-cli.bash +++ b/bash/andy-cli.bash @@ -8,7 +8,10 @@ max_index=500 # Input and output paths hads_input_path="/datadrive/HadsUKgrid/" cpm_input_path="/datadrive/UKCP2.2/" -output_path="/datadrive/clim-recal-results/group_run_2024_09_11_1500" +output_path="/datadrive/clim-recal-results/group_run_`date +%F-%H-%M`" +log_path="$output_path/logs" + +mkdir -p $log_path for i in $(seq 1 $max_index); do echo "Running for index={$i}" @@ -20,6 +23,6 @@ for i in $(seq 1 $max_index); do --cpm-input-path $cpm_input_path \ --output-path $output_path \ --execute - } 2>&1 | tee /datadrive/clim-recal-results/group_run_2024_09_12_1000/logs/log_$i.txt + } 2>&1 | tee $log_path/log_$i.txt done From 0693f37ef2087507677355028bceaec7d0d13ac0 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Thu, 12 Sep 2024 12:06:20 +0100 Subject: [PATCH 05/13] fix: change `bash/andy-cli.bash` first index from 1 to 0 --- bash/andy-cli.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash/andy-cli.bash b/bash/andy-cli.bash index eb9bf1d6..65999a1e 100644 --- a/bash/andy-cli.bash +++ b/bash/andy-cli.bash @@ -13,7 +13,7 @@ log_path="$output_path/logs" mkdir -p $log_path -for i in $(seq 1 $max_index); do +for i in $(seq 0 $max_index); do echo "Running for index={$i}" { clim-recal \ From 6643330f0d31c3d211de9d3f7a33b7487110a00f Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 13 Sep 2024 10:20:59 +0100 Subject: [PATCH 06/13] rename script --- bash/{andy-cli.bash => run-pipeline-iteratively.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bash/{andy-cli.bash => run-pipeline-iteratively.sh} (100%) diff --git a/bash/andy-cli.bash b/bash/run-pipeline-iteratively.sh similarity index 100% rename from bash/andy-cli.bash rename to bash/run-pipeline-iteratively.sh From 2494adb8c32512dfd5cbc63a474bef8c53ad3bf5 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Fri, 13 Sep 2024 13:45:59 +0100 Subject: [PATCH 07/13] fix: `ResampleManagerBase` `resample_folder` method` --- python/clim_recal/resample.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index 0b9f6926..12c76d08 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -265,7 +265,9 @@ def range_crop_projection( step: int = 1, override_export_path: Path | None = None, return_results: bool = False, - delete_xarray_after_save: bool = True, + # possible meanse of reducing memory issues by removing + # xarray instance while keeping paths for logging purposes + # delete_xarray_after_save: bool = True, **kwargs, ) -> list[Path]: start = start or self.start_index @@ -424,7 +426,7 @@ def set_cpm_for_coord_alignment(self) -> None: def to_reprojection( self, index: int = 0, - # override_export_path: Path | None = None, + override_export_path: Path | None = None, return_results: bool = False, source_to_index: Sequence | None = None, ) -> Path | T_Dataset: @@ -614,8 +616,8 @@ def input_folder(self) -> Path | None: @property def resample_folder(self) -> Path | None: """Return `self._output_path` set by `set_resample_paths()`.""" - if hasattr(self, "_input_path"): - return Path(self._input_path) + if hasattr(self, "_output_path"): + return Path(self._output_path) else: return None From 0d5484dfaa8b69003a61351c46be31abdbf9a89b Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Fri, 13 Sep 2024 13:50:12 +0100 Subject: [PATCH 08/13] fix: resample `crop` path eg. `hads/tasmax/glasgow` -> `hads/glasgow/tasmax` --- python/clim_recal/resample.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index 12c76d08..b1c7c28b 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -945,7 +945,7 @@ def _gen_crop_folder_paths( ) for var in self.variables: for region in self.crop_regions: - crop_path = Path(path) / "hads" / var / region + crop_path = Path(path) / "hads" / region / var if append_cropped_path_dict: self._cropped_path_dict[crop_path] = region yield crop_path @@ -1117,12 +1117,12 @@ def _gen_crop_folder_paths( crop_path: Path = ( Path(path) / "cpm" - / VariableOptions.cpm_value(var) / region + / VariableOptions.cpm_value(var) / run_type ) else: - crop_path: Path = Path(path) / "cpm" / var / region / run_type + crop_path: Path = Path(path) / "cpm" / region / var / run_type if append_cropped_path_dict: self._cropped_path_dict[crop_path] = region yield crop_path From 710630a70bd03337df756a9c08b90588d75bf820 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Fri, 13 Sep 2024 13:59:15 +0100 Subject: [PATCH 09/13] fix: resample `override` `CPMResample.to_reprojection` --- python/clim_recal/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index b1c7c28b..0201bb8d 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -519,7 +519,7 @@ def cpm_variable_name(self) -> str: def to_reprojection( self, index: int = 0, - # override_export_path: Path | None = None, + override_export_path: Path | None = None, return_results: bool = False, source_to_index: Sequence | None = None, ) -> Path | T_Dataset: From 0d653417c2fec51491d1b770a1fbd2fa54007a43 Mon Sep 17 00:00:00 2001 From: Sam Greenbury <50113363+sgreenbury@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:15:49 +0100 Subject: [PATCH 10/13] Add explicit CLI options --- python/clim_recal/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/clim_recal/cli.py b/python/clim_recal/cli.py index 9d73b0a6..67e6d7ed 100644 --- a/python/clim_recal/cli.py +++ b/python/clim_recal/cli.py @@ -65,10 +65,10 @@ def pipeline( all_runs: Annotated[bool, typer.Option("--all-runs")] = False, default_runs: Annotated[bool, typer.Option("--default-runs")] = False, all_methods: Annotated[bool, typer.Option("--all-methods")] = False, - cpm_projection: Annotated[bool, typer.Option(" /--skip-cpm")] = True, - hads_projection: Annotated[bool, typer.Option(" /--skip-hads")] = True, - crop_hads: Annotated[bool, typer.Option(" /--skip-crop-cpm")] = True, - crop_cpm: Annotated[bool, typer.Option(" /--skip-crop-hads")] = True, + cpm_projection: Annotated[bool, typer.Option("--project-cpm/--skip-project-cpm")] = True, + hads_projection: Annotated[bool, typer.Option("--project-hads/--skip-project-hads")] = True, + crop_hads: Annotated[bool, typer.Option("--crop-hads/--skip-crop-hads")] = True, + crop_cpm: Annotated[bool, typer.Option("--crop-cpm/--skip-crop-cpm")] = True, execute: Annotated[bool, typer.Option("--execute")] = False, start_index: Annotated[int, typer.Option("--start-index", "-s", min=0)] = 0, total: Annotated[int, typer.Option("--total-from-index", "-t", min=0)] = 0, From 0449323c289224ccdc6d775b15c44393eb09655a Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 13 Sep 2024 15:22:25 +0100 Subject: [PATCH 11/13] Fix pre-commit hooks --- python/clim_recal/cli.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/clim_recal/cli.py b/python/clim_recal/cli.py index 67e6d7ed..f0da17c0 100644 --- a/python/clim_recal/cli.py +++ b/python/clim_recal/cli.py @@ -65,8 +65,12 @@ def pipeline( all_runs: Annotated[bool, typer.Option("--all-runs")] = False, default_runs: Annotated[bool, typer.Option("--default-runs")] = False, all_methods: Annotated[bool, typer.Option("--all-methods")] = False, - cpm_projection: Annotated[bool, typer.Option("--project-cpm/--skip-project-cpm")] = True, - hads_projection: Annotated[bool, typer.Option("--project-hads/--skip-project-hads")] = True, + cpm_projection: Annotated[ + bool, typer.Option("--project-cpm/--skip-project-cpm") + ] = True, + hads_projection: Annotated[ + bool, typer.Option("--project-hads/--skip-project-hads") + ] = True, crop_hads: Annotated[bool, typer.Option("--crop-hads/--skip-crop-hads")] = True, crop_cpm: Annotated[bool, typer.Option("--crop-cpm/--skip-crop-cpm")] = True, execute: Annotated[bool, typer.Option("--execute")] = False, From f84eb04aca36eda585a9f5e5b682429546d0c144 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Fri, 13 Sep 2024 16:09:13 +0100 Subject: [PATCH 12/13] fix: `console.debug` -> `console.log` --- python/clim_recal/resample.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index 0201bb8d..1dfb0764 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -297,9 +297,7 @@ def crop_projection( **kwargs, ) -> Path | T_Dataset: """Crop a projection to `region` geometry.""" - console.debug( - f"Preparing to crop `_reprojected_paths` index {index} from {self}" - ) + console.log(f"Preparing to crop `_reprojected_paths` index {index} from {self}") try: assert hasattr(self, "_reprojected_paths") except AssertionError: @@ -321,7 +319,7 @@ def crop_projection( path.mkdir(exist_ok=True, parents=True) resampled_xr: Dataset = self._reprojected_paths[index] - console.debug(f"From {self} crop {xr_time_series}") + console.log(f"From {self} crop {xr_time_series}") cropped: Dataset = crop_xarray( xr_time_series=resampled_xr, crop_box=RegionOptions.bounding_box(self.crop_region), @@ -434,9 +432,9 @@ def to_reprojection( index=index, source_to_index=source_to_index ) path: PathLike = self.output_path - console.debug(f"Setting 'cpm_for_coord_alignment' for {self}") + console.log(f"Setting 'cpm_for_coord_alignment' for {self}") self.set_cpm_for_coord_alignment() - console.debug(f"Set 'cpm_for_coord_alignment' for {self}") + console.log(f"Set 'cpm_for_coord_alignment' for {self}") return apply_geo_func( source_path=source_path, func=self._resample_func, @@ -527,7 +525,7 @@ def to_reprojection( index=index, source_to_index=source_to_index ) path: PathLike = self.output_path - console.debug(f"Reprojecting index CPM {index}...") + console.log(f"Reprojecting index CPM {index}...") result: Path | T_Dataset | GDALDataset = apply_geo_func( source_path=source_path, func=self._resample_func, @@ -537,7 +535,7 @@ def to_reprojection( variable_name=self.cpm_variable_name, return_results=return_results, ) - console.debug(f"Completed index CPM {index}...") + console.log(f"Completed index CPM {index}...") if isinstance(result, PathLike): if not hasattr(self, "_reprojected_paths"): self._reprojected_paths: list[Path] = [] From fc1cfe9a1eddf531da19ebe30baa772b342112c4 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Fri, 13 Sep 2024 16:27:53 +0100 Subject: [PATCH 13/13] fix: `run-pipeline-iteratively.sh` executable and `log` typo in `ResamplerBase.crop` --- bash/run-pipeline-iteratively.sh | 0 python/clim_recal/resample.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 bash/run-pipeline-iteratively.sh diff --git a/bash/run-pipeline-iteratively.sh b/bash/run-pipeline-iteratively.sh old mode 100644 new mode 100755 diff --git a/python/clim_recal/resample.py b/python/clim_recal/resample.py index 1dfb0764..6dd15105 100644 --- a/python/clim_recal/resample.py +++ b/python/clim_recal/resample.py @@ -319,7 +319,7 @@ def crop_projection( path.mkdir(exist_ok=True, parents=True) resampled_xr: Dataset = self._reprojected_paths[index] - console.log(f"From {self} crop {xr_time_series}") + console.log(f"From {self} crop {resampled_xr}") cropped: Dataset = crop_xarray( xr_time_series=resampled_xr, crop_box=RegionOptions.bounding_box(self.crop_region),