From 5503e125f66d85491d54020b8a7a7102d97e995e Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Tue, 12 Dec 2023 12:48:16 +0100 Subject: [PATCH 1/3] Load constant pointing in TableLoader --- ctapipe/io/astropy_helpers.py | 20 +++- ctapipe/io/tableloader.py | 132 ++++++++++++++++---------- ctapipe/io/tests/test_table_loader.py | 25 ++--- 3 files changed, 111 insertions(+), 66 deletions(-) diff --git a/ctapipe/io/astropy_helpers.py b/ctapipe/io/astropy_helpers.py index dcf5a590bc4..ce20d08f50f 100644 --- a/ctapipe/io/astropy_helpers.py +++ b/ctapipe/io/astropy_helpers.py @@ -4,6 +4,7 @@ """ import os from contextlib import ExitStack +from uuid import uuid4 import numpy as np import tables @@ -227,7 +228,7 @@ def _parse_hdf5_attrs(table): return transforms, descriptions, meta -def join_allow_empty(left, right, keys, join_type="left", **kwargs): +def join_allow_empty(left, right, keys, join_type="left", keep_order=False, **kwargs): """ Join two astropy tables, allowing both sides to be empty tables. @@ -262,4 +263,19 @@ def join_allow_empty(left, right, keys, join_type="left", **kwargs): if right_empty: return left.copy() - return join(left, right, keys, join_type=join_type, **kwargs) + sort_key = None + if keep_order: + sort_key = str(uuid4()) + if join_type == "left": + left[sort_key] = np.arange(len(left)) + elif join_type == "right": + right[sort_key] = np.arange(len(left)) + else: + raise ValueError("keep_order is only supported for left and right joins") + + joined = join(left, right, keys, join_type=join_type, **kwargs) + if sort_key is not None: + joined.sort(sort_key) + del joined[sort_key] + + return joined diff --git a/ctapipe/io/tableloader.py b/ctapipe/io/tableloader.py index 267c2d18e08..a7c872d7578 100644 --- a/ctapipe/io/tableloader.py +++ b/ctapipe/io/tableloader.py @@ -31,6 +31,7 @@ SIMULATION_CONFIG_TABLE = "/configuration/simulation/run" SHOWER_DISTRIBUTION_TABLE = "/simulation/service/shower_distribution" OBSERVATION_TABLE = "/configuration/observation/observation_block" +FIXED_POINTING_GROUP = "configuration/telescope/pointing" DL2_SUBARRAY_GROUP = "/dl2/event/subarray" DL2_TELESCOPE_GROUP = "/dl2/event/telescope" @@ -201,6 +202,12 @@ class TableLoader(Component): observation_info = traits.Bool( False, help="join observation information to each event" ).tag(config=True) + + pointing = traits.Bool( + True, + help="Load pointing information and join to events", + ).tag(config=True) + focal_length_choice = traits.UseEnum( FocalLengthKind, default_value=FocalLengthKind.EFFECTIVE, @@ -357,22 +364,14 @@ def _join_observation_info(self, table): # casts the obs_id in the joint result to float. obs_table["obs_id"] = obs_table["obs_id"].astype(table["obs_id"].dtype) - # to be able to sort to original table order - index_added = self._add_index_if_needed(table) - - joint = join_allow_empty( + return join_allow_empty( table, obs_table, keys="obs_id", join_type="left", + keep_order=True, ) - # sort back to original order and remove index col - self._sort_to_original_order(joint, keep_index=not index_added) - if index_added: - table.remove_column("__index__") - return joint - def read_subarray_events( self, start=None, @@ -403,13 +402,14 @@ def read_subarray_events( table: astropy.io.Table Table with primary index columns "obs_id" and "event_id". """ - updated_attributes = self._check_args( - dl2=dl2, simulated=simulated, observation_info=observation_info + updated_args = self._check_args( + dl2=dl2, + simulated=simulated, + observation_info=observation_info, ) - - dl2 = updated_attributes["dl2"] - simulated = updated_attributes["simulated"] - observation_info = updated_attributes["observation_info"] + dl2 = updated_args["dl2"] + simulated = updated_args["simulated"] + observation_info = updated_args["observation_info"] table = read_table(self.h5file, TRIGGER_TABLE, start=start, stop=stop) if keep_order: @@ -469,6 +469,7 @@ def _read_telescope_events_for_id( true_images, true_parameters, instrument, + pointing, start=None, stop=None, ): @@ -500,6 +501,8 @@ def _read_telescope_events_for_id( load image parameters obtained from true images instrument: bool join subarray instrument information to each event + pointing: bool + join pointing information to each event Returns ------- @@ -593,6 +596,20 @@ def _read_telescope_events_for_id( ) table = _join_telescope_events(table, impacts) + if ( + len(table) > 0 + and pointing + and FIXED_POINTING_GROUP in self.h5file.root + ): + pointing = read_table( + self.h5file, f"{FIXED_POINTING_GROUP}/tel_{tel_id:03d}" + ) + # we know that we only have the tel_id we are looking for, remove optimize joining + del pointing["tel_id"] + table = join_allow_empty( + table, pointing, ["obs_id"], "left", keep_order=True + ) + return table def _read_telescope_events_for_ids( @@ -606,6 +623,7 @@ def _read_telescope_events_for_ids( true_images, true_parameters, instrument, + pointing, start=None, stop=None, ): @@ -620,6 +638,7 @@ def _read_telescope_events_for_ids( true_images=true_images, true_parameters=true_parameters, instrument=instrument, + pointing=pointing, start=start, stop=stop, ) @@ -687,6 +706,7 @@ def read_telescope_events( true_parameters=None, instrument=None, observation_info=None, + pointing=None, ): """ Read telescope-based event information. @@ -727,13 +747,15 @@ def read_telescope_events( join subarray instrument information to each event observation_info: bool join observation information to each event + pointing: bool + join pointing information to each event Returns ------- events: astropy.io.Table Table with primary index columns "obs_id", "event_id" and "tel_id". """ - updated_attributes = self._check_args( + updated_args = self._check_args( dl1_images=dl1_images, dl1_parameters=dl1_parameters, dl1_muons=dl1_muons, @@ -743,17 +765,18 @@ def read_telescope_events( true_parameters=true_parameters, instrument=instrument, observation_info=observation_info, + pointing=pointing, ) - - dl1_images = updated_attributes["dl1_images"] - dl1_parameters = updated_attributes["dl1_parameters"] - dl1_muons = updated_attributes["dl1_muons"] - dl2 = updated_attributes["dl2"] - simulated = updated_attributes["simulated"] - true_images = updated_attributes["true_images"] - true_parameters = updated_attributes["true_parameters"] - instrument = updated_attributes["instrument"] - observation_info = updated_attributes["observation_info"] + dl1_images = updated_args["dl1_images"] + dl1_parameters = updated_args["dl1_parameters"] + dl1_muons = updated_args["dl1_muons"] + dl2 = updated_args["dl2"] + simulated = updated_args["simulated"] + true_images = updated_args["true_images"] + true_parameters = updated_args["true_parameters"] + instrument = updated_args["instrument"] + observation_info = updated_args["observation_info"] + pointing = updated_args["pointing"] if telescopes is None: tel_ids = tuple(self.subarray.tel.keys()) @@ -770,6 +793,7 @@ def read_telescope_events( true_images=true_images, true_parameters=true_parameters, instrument=instrument, + pointing=pointing, start=start, stop=stop, ) @@ -853,6 +877,7 @@ def read_telescope_events_by_type( true_parameters=None, instrument=None, observation_info=None, + pointing=None, ) -> Dict[str, Table]: """Read subarray-based event information. @@ -878,6 +903,8 @@ def read_telescope_events_by_type( join subarray instrument information to each event observation_info: bool join observation information to each event + pointing: bool + join pointing information to each event Returns ------- @@ -885,7 +912,7 @@ def read_telescope_events_by_type( Dictionary of tables organized by telescope types Table with primary index columns "obs_id", "event_id" and "tel_id". """ - updated_attributes = self._check_args( + updated_args = self._check_args( dl1_images=dl1_images, dl1_parameters=dl1_parameters, dl1_muons=dl1_muons, @@ -895,17 +922,18 @@ def read_telescope_events_by_type( true_parameters=true_parameters, instrument=instrument, observation_info=observation_info, + pointing=pointing, ) - - dl1_images = updated_attributes["dl1_images"] - dl1_parameters = updated_attributes["dl1_parameters"] - dl1_muons = updated_attributes["dl1_muons"] - dl2 = updated_attributes["dl2"] - simulated = updated_attributes["simulated"] - true_images = updated_attributes["true_images"] - true_parameters = updated_attributes["true_parameters"] - instrument = updated_attributes["instrument"] - observation_info = updated_attributes["observation_info"] + dl1_images = updated_args["dl1_images"] + dl1_parameters = updated_args["dl1_parameters"] + dl1_muons = updated_args["dl1_muons"] + dl2 = updated_args["dl2"] + simulated = updated_args["simulated"] + true_images = updated_args["true_images"] + true_parameters = updated_args["true_parameters"] + instrument = updated_args["instrument"] + observation_info = updated_args["observation_info"] + pointing = updated_args["pointing"] if telescopes is None: tel_ids = tuple(self.subarray.tel.keys()) @@ -937,6 +965,7 @@ def read_telescope_events_by_type( true_images=true_images, true_parameters=true_parameters, instrument=instrument, + pointing=pointing, ) if len(table) > 0: @@ -990,6 +1019,7 @@ def read_telescope_events_by_id( true_parameters=None, instrument=None, observation_info=None, + pointing=None, ) -> Dict[int, Table]: """Read subarray-based event information. @@ -1015,6 +1045,8 @@ def read_telescope_events_by_id( join subarray instrument information to each event observation_info: bool join observation information to each event + pointing: bool + join pointing information to each event Returns ------- @@ -1022,7 +1054,7 @@ def read_telescope_events_by_id( Dictionary of tables organized by telescope ids Table with primary index columns "obs_id", "event_id" and "tel_id". """ - updated_attributes = self._check_args( + updated_args = self._check_args( dl1_images=dl1_images, dl1_parameters=dl1_parameters, dl1_muons=dl1_muons, @@ -1032,17 +1064,18 @@ def read_telescope_events_by_id( true_parameters=true_parameters, instrument=instrument, observation_info=observation_info, + pointing=pointing, ) - - dl1_images = updated_attributes["dl1_images"] - dl1_parameters = updated_attributes["dl1_parameters"] - dl1_muons = updated_attributes["dl1_muons"] - dl2 = updated_attributes["dl2"] - simulated = updated_attributes["simulated"] - true_images = updated_attributes["true_images"] - true_parameters = updated_attributes["true_parameters"] - instrument = updated_attributes["instrument"] - observation_info = updated_attributes["observation_info"] + dl1_images = updated_args["dl1_images"] + dl1_parameters = updated_args["dl1_parameters"] + dl1_muons = updated_args["dl1_muons"] + dl2 = updated_args["dl2"] + simulated = updated_args["simulated"] + true_images = updated_args["true_images"] + true_parameters = updated_args["true_parameters"] + instrument = updated_args["instrument"] + observation_info = updated_args["observation_info"] + pointing = updated_args["pointing"] if telescopes is None: tel_ids = tuple(self.subarray.tel.keys()) @@ -1074,6 +1107,7 @@ def read_telescope_events_by_id( true_images=true_images, true_parameters=true_parameters, instrument=instrument, + pointing=pointing, ) if len(table) > 0: by_id[tel_id] = table diff --git a/ctapipe/io/tests/test_table_loader.py b/ctapipe/io/tests/test_table_loader.py index 2262dd4de53..96e154b4567 100644 --- a/ctapipe/io/tests/test_table_loader.py +++ b/ctapipe/io/tests/test_table_loader.py @@ -272,21 +272,10 @@ def test_chunked(dl2_shower_geometry_file): stop = chunk_size with TableLoader(dl2_shower_geometry_file) as table_loader: - tel_event_it = table_loader.read_telescope_events_chunked( - chunk_size, - true_parameters=False, - ) - event_it = table_loader.read_subarray_events_chunked( - chunk_size, - ) - by_type_it = table_loader.read_telescope_events_by_type_chunked( - chunk_size, - true_parameters=False, - ) - by_id_it = table_loader.read_telescope_events_by_id_chunked( - chunk_size, - true_parameters=False, - ) + tel_event_it = table_loader.read_telescope_events_chunked(chunk_size=chunk_size, true_parameters=False) + event_it = table_loader.read_subarray_events_chunked(chunk_size=chunk_size) + by_type_it = table_loader.read_telescope_events_by_type_chunked(chunk_size=chunk_size, true_parameters=False) + by_id_it = table_loader.read_telescope_events_by_id_chunked(chunk_size=chunk_size, true_parameters=False) iters = (event_it, tel_event_it, by_type_it, by_id_it) @@ -312,6 +301,12 @@ def test_chunked(dl2_shower_geometry_file): # check events are in compatible order check_equal_array_event_order(events, tel_events) check_equal_array_event_order(trigger[start:stop], events) + np.testing.assert_allclose( + tel_events["telescope_pointing_altitude"].quantity.to_value(u.deg), 70 + ) + np.testing.assert_allclose( + tel_events["telescope_pointing_azimuth"].quantity.to_value(u.deg), 0 + ) # check number of telescope events is correct assert len(tel_events) == np.count_nonzero(events["tels_with_trigger"]) From abb48c7d307f851f82ef878744a3d101e772b357 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 4 Jan 2024 12:28:35 +0100 Subject: [PATCH 2/3] Add test for join_allow_empty --- ctapipe/io/astropy_helpers.py | 2 +- ctapipe/io/tests/test_astropy_helpers.py | 57 ++++++++++++++++++++++++ docs/changes/2481.feature.rst | 4 ++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 docs/changes/2481.feature.rst diff --git a/ctapipe/io/astropy_helpers.py b/ctapipe/io/astropy_helpers.py index ce20d08f50f..c0aa04fe746 100644 --- a/ctapipe/io/astropy_helpers.py +++ b/ctapipe/io/astropy_helpers.py @@ -24,7 +24,7 @@ TimeColumnTransform, ) -__all__ = ["read_table", "join_allow_empty"] +__all__ = ["read_table", "write_table", "join_allow_empty"] def read_table( diff --git a/ctapipe/io/tests/test_astropy_helpers.py b/ctapipe/io/tests/test_astropy_helpers.py index 52eb25e7e05..2bfd42b0372 100644 --- a/ctapipe/io/tests/test_astropy_helpers.py +++ b/ctapipe/io/tests/test_astropy_helpers.py @@ -206,3 +206,60 @@ def test_unknown_meta(tmp_path): writer.h5file.root.events._v_attrs["foo_UNIT"] = "TeV" read_table(filename, "/events", condition="energy > 0") + + +def test_join_allow_empty(): + from ctapipe.io.astropy_helpers import join_allow_empty + + table1 = Table( + { + "obs_id": [2, 2, 2, 1, 1, 1], + "event_id": [1, 2, 4, 1, 3, 5], + "value1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + } + ) + + table2 = Table( + { + "obs_id": [2, 2, 2, 1, 1, 1], + "event_id": [1, 2, 3, 1, 2, 4], + "value2": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + } + ) + + keys = ["obs_id", "event_id"] + + for join_type in ("left", "outer"): + assert_table_equal( + join_allow_empty(table1, Table(), join_type=join_type, keys=keys), table1 + ) + + for join_type in ("right", "inner"): + assert_table_equal( + join_allow_empty(table1, Table(), join_type=join_type, keys=keys), Table() + ) + + for join_type in ("left", "right", "inner", "outer"): + assert_table_equal( + join_allow_empty(Table(), Table(), join_type=join_type, keys=keys), Table() + ) + + result = join_allow_empty(table1, table2, keys=keys) + assert result.colnames == ["obs_id", "event_id", "value1", "value2"] + # join result will be ordered by the keys by default + np.testing.assert_equal(result["obs_id"], [1, 1, 1, 2, 2, 2]) + np.testing.assert_equal(result["event_id"], [1, 3, 5, 1, 2, 4]) + + # with keep order tests + for join_type in ("inner", "outer"): + with pytest.raises(ValueError, match="keep_order is only supported"): + join_allow_empty( + table1, table2, keys=keys, join_type=join_type, keep_order=True + ) + + for join_type, reference in zip(("left", "right"), (table1, table2)): + result = join_allow_empty( + table1, table2, keys=keys, join_type=join_type, keep_order=True + ) + np.testing.assert_equal(result["obs_id"], reference["obs_id"]) + np.testing.assert_equal(result["event_id"], reference["event_id"]) diff --git a/docs/changes/2481.feature.rst b/docs/changes/2481.feature.rst new file mode 100644 index 00000000000..124f8206c7b --- /dev/null +++ b/docs/changes/2481.feature.rst @@ -0,0 +1,4 @@ +Also load the new fixed pointing information in ``TableLoader``. + +Add option ``keep_order`` to ``ctapipe.io.astropy_helpers.join_allow_empty`` +that will keep the original order of rows when performing left or right joins. From 492b0b215553354a7ba0872d007b6e33dcc06b86 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 4 Jan 2024 13:23:46 +0100 Subject: [PATCH 3/3] Run pre-commit --- ctapipe/io/tableloader.py | 6 +----- ctapipe/io/tests/test_table_loader.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ctapipe/io/tableloader.py b/ctapipe/io/tableloader.py index a7c872d7578..98a446de23d 100644 --- a/ctapipe/io/tableloader.py +++ b/ctapipe/io/tableloader.py @@ -596,11 +596,7 @@ def _read_telescope_events_for_id( ) table = _join_telescope_events(table, impacts) - if ( - len(table) > 0 - and pointing - and FIXED_POINTING_GROUP in self.h5file.root - ): + if len(table) > 0 and pointing and FIXED_POINTING_GROUP in self.h5file.root: pointing = read_table( self.h5file, f"{FIXED_POINTING_GROUP}/tel_{tel_id:03d}" ) diff --git a/ctapipe/io/tests/test_table_loader.py b/ctapipe/io/tests/test_table_loader.py index 96e154b4567..97578a78395 100644 --- a/ctapipe/io/tests/test_table_loader.py +++ b/ctapipe/io/tests/test_table_loader.py @@ -272,10 +272,16 @@ def test_chunked(dl2_shower_geometry_file): stop = chunk_size with TableLoader(dl2_shower_geometry_file) as table_loader: - tel_event_it = table_loader.read_telescope_events_chunked(chunk_size=chunk_size, true_parameters=False) + tel_event_it = table_loader.read_telescope_events_chunked( + chunk_size=chunk_size, true_parameters=False + ) event_it = table_loader.read_subarray_events_chunked(chunk_size=chunk_size) - by_type_it = table_loader.read_telescope_events_by_type_chunked(chunk_size=chunk_size, true_parameters=False) - by_id_it = table_loader.read_telescope_events_by_id_chunked(chunk_size=chunk_size, true_parameters=False) + by_type_it = table_loader.read_telescope_events_by_type_chunked( + chunk_size=chunk_size, true_parameters=False + ) + by_id_it = table_loader.read_telescope_events_by_id_chunked( + chunk_size=chunk_size, true_parameters=False + ) iters = (event_it, tel_event_it, by_type_it, by_id_it)