From eb33c54eadad171843d9e1c1fa84ec4e0f442fed Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 15 Jan 2025 14:34:00 +0100 Subject: [PATCH 1/2] faster merge operation without for loops --- .../datacube_merger/datacube_merger.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py b/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py index 5b0516638..85f1fef75 100644 --- a/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py +++ b/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py @@ -24,19 +24,16 @@ def _mapped_axes(self): return self._first_axis def merged_values(self, datacube): - first_ax_vals = datacube.ax_vals(self.name) + first_ax_vals = np.array(datacube.ax_vals(self.name)) second_ax_name = self._second_axis - second_ax_vals = datacube.ax_vals(second_ax_name) + second_ax_vals = np.array(datacube.ax_vals(second_ax_name)) linkers = self._linkers - merged_values = [] - for i in range(len(first_ax_vals)): - first_val = first_ax_vals[i] - for j in range(len(second_ax_vals)): - second_val = second_ax_vals[j] - val_to_add = pd.to_datetime("".join([first_val, linkers[0], second_val, linkers[1]])) - val_to_add = val_to_add.to_numpy() - val_to_add = val_to_add.astype("datetime64[s]") - merged_values.append(val_to_add) + first_grid, second_grid = np.meshgrid(first_ax_vals, second_ax_vals, indexing="ij") + combined_strings = np.char.add( + np.char.add(first_grid.ravel(), linkers[0]), + np.char.add(second_grid.ravel(), linkers[1]) + ) + merged_values = pd.to_datetime(combined_strings).to_numpy().astype("datetime64[s]") merged_values = np.array(merged_values) logging.info( f"Merged values {first_ax_vals} on axis {self.name} and \ @@ -59,7 +56,7 @@ def unmerge(self, merged_val): first_val = merged_val[:first_idx] first_linker_size = len(self._linkers[0]) second_linked_size = len(self._linkers[1]) - second_val = merged_val[first_idx + first_linker_size : -second_linked_size] + second_val = merged_val[first_idx + first_linker_size: -second_linked_size] # TODO: maybe replacing like this is too specific to time/dates? first_val = str(first_val).replace("-", "") From a92d15d9b7af127b91c8b9f5603102203d8698b0 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 15 Jan 2025 14:45:42 +0100 Subject: [PATCH 2/2] black --- .../datacube_mappers/mapper_types/healpix_nested.py | 8 +++++--- .../transformations/datacube_merger/datacube_merger.py | 5 ++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py index dca87a7a3..334805d42 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py @@ -222,6 +222,8 @@ def int_sqrt(self, i): # md5 grid hash in form {resolution : hash} -_md5_hash = {1024: "cbda19e48d4d7e5e22641154878b9b22", - 512: "9533855ee8e38314e19aaa0434c310da", - 128: "f3dfeb7a5bbbdd13a20d10fdb3797c71"} +_md5_hash = { + 1024: "cbda19e48d4d7e5e22641154878b9b22", + 512: "9533855ee8e38314e19aaa0434c310da", + 128: "f3dfeb7a5bbbdd13a20d10fdb3797c71", +} diff --git a/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py b/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py index 85f1fef75..51acd9984 100644 --- a/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py +++ b/polytope_feature/datacube/transformations/datacube_merger/datacube_merger.py @@ -30,8 +30,7 @@ def merged_values(self, datacube): linkers = self._linkers first_grid, second_grid = np.meshgrid(first_ax_vals, second_ax_vals, indexing="ij") combined_strings = np.char.add( - np.char.add(first_grid.ravel(), linkers[0]), - np.char.add(second_grid.ravel(), linkers[1]) + np.char.add(first_grid.ravel(), linkers[0]), np.char.add(second_grid.ravel(), linkers[1]) ) merged_values = pd.to_datetime(combined_strings).to_numpy().astype("datetime64[s]") merged_values = np.array(merged_values) @@ -56,7 +55,7 @@ def unmerge(self, merged_val): first_val = merged_val[:first_idx] first_linker_size = len(self._linkers[0]) second_linked_size = len(self._linkers[1]) - second_val = merged_val[first_idx + first_linker_size: -second_linked_size] + second_val = merged_val[first_idx + first_linker_size : -second_linked_size] # TODO: maybe replacing like this is too specific to time/dates? first_val = str(first_val).replace("-", "")