From 034f910ebef15b395199292271a9930569ab7cb0 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 12:25:20 -0400
Subject: [PATCH 01/10] Minor changes for inelasticity, option for sampling
 pulses

---
 src/graphnet/data/constants.py             |  2 ++
 src/graphnet/models/graphs/nodes/nodes.py  | 14 +++++++++++-
 src/graphnet/models/task/reconstruction.py | 26 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/graphnet/data/constants.py b/src/graphnet/data/constants.py
index 10ed4c66e..02a9e5a46 100644
--- a/src/graphnet/data/constants.py
+++ b/src/graphnet/data/constants.py
@@ -50,6 +50,8 @@ class TRUTH:
         "interaction_type",
         "interaction_time",  # Added for vertex reconstruction
         "inelasticity",
+        "visible_inelasticity",
+        "visible_energy",
         "stopped_muon",
     ]
     DEEPCORE = ICECUBE86
diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 4e094e6be..2a0bebc02 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -325,6 +325,7 @@ def __init__(
             "z_offset": None,
             "z_scaling": None,
         },
+        sample_pulses: bool = True,
     ) -> None:
         """Construct `IceMixNodes`.
 
@@ -338,6 +339,9 @@ def __init__(
             ice in IceCube are added to the feature set based on z coordinate.
             ice_args: Offset and scaling of the z coordinate in the Detector,
             to be able to make similar conversion in the ice data.
+            sample_pulses: Enable sampling random pulses. If True and the
+            event is longer than the max_length, they will be sampled. If
+            False, then only the first max_length pulses will be selected.
         """
         if input_feature_names is None:
             input_feature_names = [
@@ -383,6 +387,7 @@ def __init__(
         self.z_name = z_name
         self.hlc_name = hlc_name
         self.add_ice_properties = add_ice_properties
+        self.sampling_enabled = sample_pulses
 
     def _define_output_feature_names(
         self, input_feature_names: List[str]
@@ -436,7 +441,14 @@ def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
             x[:, self.feature_indexes[self.hlc_name]] = torch.logical_not(
                 x[:, self.feature_indexes[self.hlc_name]]
             )  # hlc in kaggle was flipped
-        ids = self._pulse_sampler(x, event_length)
+        if self.sampling_enabled:
+            ids = self._pulse_sampler(x, event_length)
+        else:
+            if event_length < self.max_length:
+                ids = torch.arange(event_length)
+            else:
+                ids = torch.arange(self.max_length)
+        
         event_length = min(self.max_length, event_length)
 
         graph = torch.zeros([event_length, self.n_features])
diff --git a/src/graphnet/models/task/reconstruction.py b/src/graphnet/models/task/reconstruction.py
index e9b3cdaa5..4f51a7f3a 100644
--- a/src/graphnet/models/task/reconstruction.py
+++ b/src/graphnet/models/task/reconstruction.py
@@ -111,7 +111,18 @@ def _forward(self, x: Tensor) -> Tensor:
         # Transform, thereby preventing overflow and underflow error.
         return torch.nn.functional.softplus(x, beta=0.05) + eps_like(x)
 
+class PlainEnergyReconstruction(StandardLearnedTask):
+    """Reconstructs energy using stable method."""
 
+    # Requires one feature: untransformed energy
+    default_target_labels = ["energy"]
+    default_prediction_labels = ["energy_pred"]
+    nb_inputs = 1
+
+    def _forward(self, x: Tensor) -> Tensor:
+        return x
+    
+    
 class EnergyReconstructionWithPower(StandardLearnedTask):
     """Reconstructs energy."""
 
@@ -231,3 +242,18 @@ class InelasticityReconstruction(StandardLearnedTask):
     def _forward(self, x: Tensor) -> Tensor:
         # Transform output to unit range
         return torch.sigmoid(x)
+
+class VisibleInelasticityReconstruction(StandardLearnedTask):
+    """Reconstructs interaction visible inelasticity.
+
+    That is, 1-(visible track energy / visible hadronic energy).
+    """
+
+    # Requires one features: inelasticity itself
+    default_target_labels = ["visible_inelasticity"]
+    default_prediction_labels = ["visible_inelasticity_pred"]
+    nb_inputs = 1
+
+    def _forward(self, x: Tensor) -> Tensor:
+        # Transform output to unit range
+        return 0.5 * (torch.tanh(2.0 * x) + 1.0)
\ No newline at end of file

From 2767efa1a137a1909cd4448a89637375f9a25f60 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 12:56:15 -0400
Subject: [PATCH 02/10] Adding extra functionality for starting events

---
 .../extractors/icecube/i3truthextractor.py    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index b715e57ab..b1cf946d7 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import matplotlib.path as mpath
+from scipy.spatial import ConvexHull, Delaunay
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 from .i3extractor import I3Extractor
@@ -27,6 +28,7 @@ def __init__(
         name: str = "truth",
         borders: Optional[List[np.ndarray]] = None,
         mctree: Optional[str] = "I3MCTree",
+        extend_boundary: Optional[float] = 0.0,
     ):
         """Construct I3TruthExtractor.
 
@@ -37,6 +39,8 @@ def __init__(
                 stopping within the detector. Defaults to hard-coded boundary
                 coordinates.
             mctree: Str of which MCTree to use for truth values.
+            extend_boundary: Float to extend the convex hull of the detector
+                for defining starting events.
         """
         # Base class constructor
         super().__init__(name)
@@ -78,6 +82,25 @@ def __init__(
             self._borders = [border_xy, border_z]
         else:
             self._borders = borders
+            
+        coordinates = []
+        for omkey, g in self._gcd_dict.items():
+            if g.position.z > 1200: continue  # We want to exclude icetop
+            coordinates.append([g.position.x, g.position.y, g.position.z])
+        hull = scipy.spatial.ConvexHull(np.array(coordinates))
+        hull_points = coordinates[hull.vertices]
+        if extend_boundary > 0:
+            center = np.mean(hull_points, axis=0)
+            d = hull_points - center
+            norms = np.linalg.norm(d, axis=1, keepdims=True)
+            dn = d / norms
+            
+            extended_points = hull_points + dn * extend_boundary
+            hull = ConvexHull(extended_points)
+            
+        self.hull = hull
+        self.delaunay = Delaunay(coordinates[self.hull.vertices])
+            
         self._mctree = mctree
 
     def __call__(
@@ -119,6 +142,7 @@ def __call__(
             "L5_oscNext_bool": padding_value,
             "L6_oscNext_bool": padding_value,
             "L7_oscNext_bool": padding_value,
+            "is_starting": padding_value,
         }
 
         # Only InIceSplit P frames contain ML appropriate I3RecoPulseSeriesMap etc.
@@ -224,6 +248,13 @@ def __call__(
                         "stopped_muon": muon_final["stopped"],
                     }
                 )
+                
+            starting = self._contained_vertex(output)
+            output.update(
+                {
+                    "is_starting": starting,
+                }
+            )
 
         return output
 
@@ -438,3 +469,9 @@ def _find_data_type(self, mc: bool, input_file: str) -> str:
         else:
             sim_type = "NuGen"
         return sim_type
+    
+    def _contained_vertex(self, truth: Dict[str, Any]):
+        vertex = np.array(
+            [truth["position_x"], truth["position_y"], truth["position_z"]]
+        )
+        return self.delaunay.find_simplex(vertex) >= 0

From edf0123da1b5d9792d3f0735a1d4216d29b4978f Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 12:56:43 -0400
Subject: [PATCH 03/10] Add trivial MAE loss

---
 src/graphnet/training/loss_functions.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/graphnet/training/loss_functions.py b/src/graphnet/training/loss_functions.py
index d3fc43f7e..45799b6d2 100644
--- a/src/graphnet/training/loss_functions.py
+++ b/src/graphnet/training/loss_functions.py
@@ -64,6 +64,11 @@ def _forward(self, prediction: Tensor, target: Tensor) -> Tensor:
         """Syntax like `.forward`, for implentation in inheriting classes."""
 
 
+class MAELoss(LossFunction):
+    def _forward(self, prediction: Tensor, target: Tensor) -> Tensor:
+        return torch.mean(torch.abs(prediction - target), dim=-1)
+
+
 class MSELoss(LossFunction):
     """Mean squared error loss."""
 

From ca3cc3c6b917971fbbccbda419254f406e2f6766 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 15:50:22 -0400
Subject: [PATCH 04/10] Improved convex hull calculation, added new filter

---
 .../extractors/icecube/i3truthextractor.py    | 131 ++++++++++++++----
 .../icecube/utilities/i3_filters.py           |  13 ++
 2 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index b1cf946d7..cf6f7ce8f 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -17,6 +17,8 @@
         dataclasses,
         icetray,
         phys_services,
+        dataio,
+        LeptonInjector,
     )  # pyright: reportMissingImports=false
 
 
@@ -82,26 +84,77 @@ def __init__(
             self._borders = [border_xy, border_z]
         else:
             self._borders = borders
-            
+        
+        self._extend_boundary = extend_boundary
+        self._mctree = mctree
+        
+    def set_gcd(self, i3_file: str, gcd_file: Optional[str] = None) -> None:
+        """Extract GFrame and CFrame from i3/gcd-file pair.
+
+           Information from these frames will be set as member variables of
+           `I3Extractor.`
+
+        Args:
+            i3_file: Path to i3 file that is being converted.
+            gcd_file: Path to GCD file. Defaults to None. If no GCD file is
+                      given, the method will attempt to find C and G frames in
+                      the i3 file instead. If either one of those are not
+                      present, `RuntimeErrors` will be raised.
+        """
+        if gcd_file is None:
+            # If no GCD file is provided, search the I3 file for frames
+            # containing geometry (GFrame) and calibration (CFrame)
+            gcd = dataio.I3File(i3_file)
+        else:
+            # Ideally ends here
+            gcd = dataio.I3File(gcd_file)
+
+        # Get GFrame
+        try:
+            g_frame = gcd.pop_frame(icetray.I3Frame.Geometry)
+            # If the line above fails, it means that no gcd file was given
+            # and that the i3 file does not have a G-Frame in it.
+        except RuntimeError as e:
+            self.error(
+                "No GCD file was provided "
+                f"and no G-frame was found in {i3_file.split('/')[-1]}."
+            )
+            raise e
+
+        # Get CFrame
+        try:
+            c_frame = gcd.pop_frame(icetray.I3Frame.Calibration)
+            # If the line above fails, it means that no gcd file was given
+            # and that the i3 file does not have a C-Frame in it.
+        except RuntimeError as e:
+            self.warning(
+                "No GCD file was provided and no C-frame "
+                f"was found in {i3_file.split('/')[-1]}."
+            )
+            raise e
+
+        # Save information as member variables of I3Extractor
+        self._gcd_dict = g_frame["I3Geometry"].omgeo
+        self._calibration = c_frame["I3Calibration"]
+        
         coordinates = []
         for omkey, g in self._gcd_dict.items():
             if g.position.z > 1200: continue  # We want to exclude icetop
             coordinates.append([g.position.x, g.position.y, g.position.z])
-        hull = scipy.spatial.ConvexHull(np.array(coordinates))
-        hull_points = coordinates[hull.vertices]
-        if extend_boundary > 0:
-            center = np.mean(hull_points, axis=0)
-            d = hull_points - center
+        coordinates = np.array(coordinates)
+        
+        if self._extend_boundary != 0.0:
+            print("Boundary: ", self._extend_boundary)
+            center = np.mean(coordinates, axis=0)
+            d = coordinates - center
             norms = np.linalg.norm(d, axis=1, keepdims=True)
             dn = d / norms
-            
-            extended_points = hull_points + dn * extend_boundary
-            hull = ConvexHull(extended_points)
+            coordinates = coordinates + dn * self._extend_boundary
+        
+        hull = ConvexHull(coordinates)        
             
         self.hull = hull
         self.delaunay = Delaunay(coordinates[self.hull.vertices])
-            
-        self._mctree = mctree
 
     def __call__(
         self, frame: "icetray.I3Frame", padding_value: Any = -1
@@ -109,7 +162,7 @@ def __call__(
         """Extract truth-level information."""
         is_mc = frame_is_montecarlo(frame, self._mctree)
         is_noise = frame_is_noise(frame, self._mctree)
-        sim_type = self._find_data_type(is_mc, self._i3_file)
+        sim_type = self._find_data_type(is_mc, self._i3_file, frame)
 
         output = {
             "energy": padding_value,
@@ -142,7 +195,7 @@ def __call__(
             "L5_oscNext_bool": padding_value,
             "L6_oscNext_bool": padding_value,
             "L7_oscNext_bool": padding_value,
-            "is_starting": padding_value,
+            "starting": padding_value,
         }
 
         # Only InIceSplit P frames contain ML appropriate I3RecoPulseSeriesMap etc.
@@ -252,7 +305,7 @@ def __call__(
             starting = self._contained_vertex(output)
             output.update(
                 {
-                    "is_starting": starting,
+                    "starting": starting,
                 }
             )
 
@@ -399,15 +452,33 @@ def _get_primary_particle_interaction_type_and_elasticity(
                 ]  # For some strange reason the second entry is identical in all variables and has no nans (always muon)
         else:
             MCInIcePrimary = None
-        try:
-            interaction_type = frame["I3MCWeightDict"]["InteractionType"]
-        except KeyError:
-            interaction_type = padding_value
-
-        try:
-            elasticity = frame["I3GENIEResultDict"]["y"]
-        except KeyError:
-            elasticity = padding_value
+        
+        if sim_type == "LeptonInjector":
+            event_properties = frame["EventProperties"]
+            
+            final_state_1 = event_properties.finalType1
+            if final_state_1 in [dataclasses.I3Particle.NuE, 
+                                    dataclasses.I3Particle.NuMu, 
+                                    dataclasses.I3Particle.NuTau,
+                                    dataclasses.I3Particle.NuEBar,
+                                    dataclasses.I3Particle.NuMuBar, 
+                                    dataclasses.I3Particle.NuTauBar]:
+                interaction_type = 2  # NC
+            else:
+                interaction_type = 1  # CC
+            
+            elasticity = 1 - event_properties.finalStateY
+        
+        else:
+            try:
+                interaction_type = frame["I3MCWeightDict"]["InteractionType"]
+            except KeyError:
+                interaction_type = padding_value
+                
+            try:
+                elasticity = 1 - frame["I3MCWeightDict"]["BjorkenY"]
+            except KeyError:
+                elasticity = padding_value
 
         return MCInIcePrimary, interaction_type, elasticity
 
@@ -443,12 +514,15 @@ def _get_primary_track_energy_and_inelasticity(
         return energy_track, energy_cascade, inelasticity
 
     # Utility methods
-    def _find_data_type(self, mc: bool, input_file: str) -> str:
+    def _find_data_type(self, mc: bool, 
+                        input_file: str, 
+                        frame: "icetray.I3Frame") -> str:
         """Determine the data type.
 
         Args:
             mc: Whether `input_file` is Monte Carlo simulation.
             input_file: Path to I3-file.
+            frame: Physics frame containing MC record
 
         Returns:
             The simulation/data type.
@@ -464,10 +538,13 @@ def _find_data_type(self, mc: bool, input_file: str) -> str:
             sim_type = "genie"
         elif "noise" in input_file:
             sim_type = "noise"
-        elif "L2" in input_file:  # not robust
-            sim_type = "dbang"
-        else:
+        elif (frame.Has("EventProprties") or \
+              frame.Has("LeptonInjectorProperties")):
+            sim_type = "LeptonInjector"
+        elif frame.Has("I3MCWeightDict"):
             sim_type = "NuGen"
+        else:
+            raise NotImplementedError("Could not determine data type.")
         return sim_type
     
     def _contained_vertex(self, truth: Dict[str, Any]):
diff --git a/src/graphnet/data/extractors/icecube/utilities/i3_filters.py b/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
index ca83f4217..7f3050b5f 100644
--- a/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
+++ b/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
@@ -63,6 +63,19 @@ def _keep_frame(self, frame: "icetray.I3Frame") -> bool:
         return True
 
 
+class SubEventStreamI3Filter(I3Filter):
+    """A filter that only keeps frames from select splits."""
+    
+    def __init__(self, split_names: List[str]):
+        self._split_names = split_names
+        
+    def _keep_frame(self, frame: "icetray.I3Frame") -> bool:
+        if frame.Has("I3EventHeader"):
+            if frame["I3EventHeader"].sub_event_stream not in self._split_names:
+                return False
+        return True
+    
+
 class I3FilterMask(I3Filter):
     """checks list of filters from the FilterMask in I3 frames."""
 

From 0cf1b03b87c5828e1b65d1a4e489649a291eb131 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 15:57:12 -0400
Subject: [PATCH 05/10] Fixes, remove prints

---
 .../extractors/icecube/i3truthextractor.py    | 60 ++++++++++---------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index cf6f7ce8f..2b14b81e9 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -84,10 +84,10 @@ def __init__(
             self._borders = [border_xy, border_z]
         else:
             self._borders = borders
-        
+
         self._extend_boundary = extend_boundary
         self._mctree = mctree
-        
+
     def set_gcd(self, i3_file: str, gcd_file: Optional[str] = None) -> None:
         """Extract GFrame and CFrame from i3/gcd-file pair.
 
@@ -136,23 +136,23 @@ def set_gcd(self, i3_file: str, gcd_file: Optional[str] = None) -> None:
         # Save information as member variables of I3Extractor
         self._gcd_dict = g_frame["I3Geometry"].omgeo
         self._calibration = c_frame["I3Calibration"]
-        
+
         coordinates = []
         for omkey, g in self._gcd_dict.items():
-            if g.position.z > 1200: continue  # We want to exclude icetop
+            if g.position.z > 1200:
+                continue  # We want to exclude icetop
             coordinates.append([g.position.x, g.position.y, g.position.z])
         coordinates = np.array(coordinates)
-        
+
         if self._extend_boundary != 0.0:
-            print("Boundary: ", self._extend_boundary)
             center = np.mean(coordinates, axis=0)
             d = coordinates - center
             norms = np.linalg.norm(d, axis=1, keepdims=True)
             dn = d / norms
             coordinates = coordinates + dn * self._extend_boundary
-        
-        hull = ConvexHull(coordinates)        
-            
+
+        hull = ConvexHull(coordinates)
+
         self.hull = hull
         self.delaunay = Delaunay(coordinates[self.hull.vertices])
 
@@ -301,7 +301,7 @@ def __call__(
                         "stopped_muon": muon_final["stopped"],
                     }
                 )
-                
+
             starting = self._contained_vertex(output)
             output.update(
                 {
@@ -452,29 +452,30 @@ def _get_primary_particle_interaction_type_and_elasticity(
                 ]  # For some strange reason the second entry is identical in all variables and has no nans (always muon)
         else:
             MCInIcePrimary = None
-        
+
         if sim_type == "LeptonInjector":
             event_properties = frame["EventProperties"]
-            
             final_state_1 = event_properties.finalType1
-            if final_state_1 in [dataclasses.I3Particle.NuE, 
-                                    dataclasses.I3Particle.NuMu, 
-                                    dataclasses.I3Particle.NuTau,
-                                    dataclasses.I3Particle.NuEBar,
-                                    dataclasses.I3Particle.NuMuBar, 
-                                    dataclasses.I3Particle.NuTauBar]:
+            if final_state_1 in [
+                dataclasses.I3Particle.NuE,
+                dataclasses.I3Particle.NuMu,
+                dataclasses.I3Particle.NuTau,
+                dataclasses.I3Particle.NuEBar,
+                dataclasses.I3Particle.NuMuBar,
+                dataclasses.I3Particle.NuTauBar,
+            ]:
                 interaction_type = 2  # NC
             else:
                 interaction_type = 1  # CC
-            
+
             elasticity = 1 - event_properties.finalStateY
-        
+
         else:
             try:
                 interaction_type = frame["I3MCWeightDict"]["InteractionType"]
             except KeyError:
-                interaction_type = padding_value
-                
+                interaction_type = int(padding_value)
+
             try:
                 elasticity = 1 - frame["I3MCWeightDict"]["BjorkenY"]
             except KeyError:
@@ -514,9 +515,9 @@ def _get_primary_track_energy_and_inelasticity(
         return energy_track, energy_cascade, inelasticity
 
     # Utility methods
-    def _find_data_type(self, mc: bool, 
-                        input_file: str, 
-                        frame: "icetray.I3Frame") -> str:
+    def _find_data_type(
+        self, mc: bool, input_file: str, frame: "icetray.I3Frame"
+    ) -> str:
         """Determine the data type.
 
         Args:
@@ -538,16 +539,17 @@ def _find_data_type(self, mc: bool,
             sim_type = "genie"
         elif "noise" in input_file:
             sim_type = "noise"
-        elif (frame.Has("EventProprties") or \
-              frame.Has("LeptonInjectorProperties")):
+        elif frame.Has("EventProprties") or frame.Has(
+            "LeptonInjectorProperties"
+        ):
             sim_type = "LeptonInjector"
         elif frame.Has("I3MCWeightDict"):
             sim_type = "NuGen"
         else:
             raise NotImplementedError("Could not determine data type.")
         return sim_type
-    
-    def _contained_vertex(self, truth: Dict[str, Any]):
+
+    def _contained_vertex(self, truth: Dict[str, Any]) -> bool:
         vertex = np.array(
             [truth["position_x"], truth["position_y"], truth["position_z"]]
         )

From a05ce7fb7a9e8df184024d38e09124f856782505 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 16:00:59 -0400
Subject: [PATCH 06/10] Update docstrings

---
 .../extractors/icecube/i3truthextractor.py    |  8 +++++++
 .../icecube/utilities/i3_filters.py           | 23 ++++++++++++++-----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index 2b14b81e9..8dfa574ea 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -550,6 +550,14 @@ def _find_data_type(
         return sim_type
 
     def _contained_vertex(self, truth: Dict[str, Any]) -> bool:
+        """Determine if an event is starting based on vertex position.
+
+        Args:
+            truth: Dictionary of already extracted truth-level information.
+
+        Returns:
+            True/False if vertex is inside detector.
+        """
         vertex = np.array(
             [truth["position_x"], truth["position_y"], truth["position_z"]]
         )
diff --git a/src/graphnet/data/extractors/icecube/utilities/i3_filters.py b/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
index 7f3050b5f..eee9e73c8 100644
--- a/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
+++ b/src/graphnet/data/extractors/icecube/utilities/i3_filters.py
@@ -65,16 +65,27 @@ def _keep_frame(self, frame: "icetray.I3Frame") -> bool:
 
 class SubEventStreamI3Filter(I3Filter):
     """A filter that only keeps frames from select splits."""
-    
-    def __init__(self, split_names: List[str]):
-        self._split_names = split_names
-        
+
+    def __init__(self, selection: List[str]):
+        """Initialize SubEventStreamI3Filter.
+
+        Args:
+            selection: List of subevent streams to keep.
+        """
+        self._selection = selection
+
     def _keep_frame(self, frame: "icetray.I3Frame") -> bool:
+        """Check if current frame should be kept.
+
+        Args:
+            frame: I3-frame
+                The I3-frame to check.
+        """
         if frame.Has("I3EventHeader"):
-            if frame["I3EventHeader"].sub_event_stream not in self._split_names:
+            if frame["I3EventHeader"].sub_event_stream not in self._selection:
                 return False
         return True
-    
+
 
 class I3FilterMask(I3Filter):
     """checks list of filters from the FilterMask in I3 frames."""

From b52540de9608b8819ff433ba45296e3acec2e130 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 1 Nov 2024 16:04:17 -0400
Subject: [PATCH 07/10] Remove an old test reconstruction task

---
 .../data/extractors/icecube/i3truthextractor.py    |  2 +-
 src/graphnet/models/task/reconstruction.py         | 14 ++------------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index 8dfa574ea..9d75d4a2f 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -41,7 +41,7 @@ def __init__(
                 stopping within the detector. Defaults to hard-coded boundary
                 coordinates.
             mctree: Str of which MCTree to use for truth values.
-            extend_boundary: Float to extend the convex hull of the detector
+            extend_boundary: Distance to extend the convex hull of the detector
                 for defining starting events.
         """
         # Base class constructor
diff --git a/src/graphnet/models/task/reconstruction.py b/src/graphnet/models/task/reconstruction.py
index 4f51a7f3a..3b4fca932 100644
--- a/src/graphnet/models/task/reconstruction.py
+++ b/src/graphnet/models/task/reconstruction.py
@@ -111,18 +111,7 @@ def _forward(self, x: Tensor) -> Tensor:
         # Transform, thereby preventing overflow and underflow error.
         return torch.nn.functional.softplus(x, beta=0.05) + eps_like(x)
 
-class PlainEnergyReconstruction(StandardLearnedTask):
-    """Reconstructs energy using stable method."""
 
-    # Requires one feature: untransformed energy
-    default_target_labels = ["energy"]
-    default_prediction_labels = ["energy_pred"]
-    nb_inputs = 1
-
-    def _forward(self, x: Tensor) -> Tensor:
-        return x
-    
-    
 class EnergyReconstructionWithPower(StandardLearnedTask):
     """Reconstructs energy."""
 
@@ -243,6 +232,7 @@ def _forward(self, x: Tensor) -> Tensor:
         # Transform output to unit range
         return torch.sigmoid(x)
 
+
 class VisibleInelasticityReconstruction(StandardLearnedTask):
     """Reconstructs interaction visible inelasticity.
 
@@ -256,4 +246,4 @@ class VisibleInelasticityReconstruction(StandardLearnedTask):
 
     def _forward(self, x: Tensor) -> Tensor:
         # Transform output to unit range
-        return 0.5 * (torch.tanh(2.0 * x) + 1.0)
\ No newline at end of file
+        return 0.5 * (torch.tanh(2.0 * x) + 1.0)

From 3d0f71ffcb47c8fcd08c6fab7509e01af3ba7b99 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Wed, 6 Nov 2024 14:45:28 -0500
Subject: [PATCH 08/10] Formatting

---
 src/graphnet/data/extractors/icecube/i3truthextractor.py | 8 ++++----
 src/graphnet/models/graphs/nodes/nodes.py                | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index d29562c2a..ec64632ac 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -13,7 +13,7 @@
 from graphnet.utilities.imports import has_icecube_package
 
 if has_icecube_package() or TYPE_CHECKING:
-    from icecube import (
+    from icecube import (  # noqa: F401
         dataclasses,
         icetray,
         phys_services,
@@ -195,7 +195,7 @@ def __call__(
             "L5_oscNext_bool": padding_value,
             "L6_oscNext_bool": padding_value,
             "L7_oscNext_bool": padding_value,
-            "starting": padding_value,
+            "is_starting": padding_value,
         }
 
         # Only InIceSplit P frames contain ML appropriate
@@ -307,10 +307,10 @@ def __call__(
                     }
                 )
 
-            starting = self._contained_vertex(output)
+            is_starting = self._contained_vertex(output)
             output.update(
                 {
-                    "starting": starting,
+                    "is_starting": is_starting,
                 }
             )
 
diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 3185828c4..e8f8d749d 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -449,7 +449,7 @@ def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
                 ids = torch.arange(event_length)
             else:
                 ids = torch.arange(self.max_length)
-        
+
         event_length = min(self.max_length, event_length)
 
         graph = torch.zeros([event_length, self.n_features])

From 6777498d9554ef237ea203f4b9b714b24bc9a3eb Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Wed, 6 Nov 2024 14:51:58 -0500
Subject: [PATCH 09/10] Add MAELoss docstrings

---
 src/graphnet/training/loss_functions.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/graphnet/training/loss_functions.py b/src/graphnet/training/loss_functions.py
index 5e3364c8e..534d095eb 100644
--- a/src/graphnet/training/loss_functions.py
+++ b/src/graphnet/training/loss_functions.py
@@ -64,7 +64,10 @@ def _forward(self, prediction: Tensor, target: Tensor) -> Tensor:
 
 
 class MAELoss(LossFunction):
+    """Mean absolute error loss."""
+
     def _forward(self, prediction: Tensor, target: Tensor) -> Tensor:
+        """Implement loss calculation."""
         return torch.mean(torch.abs(prediction - target), dim=-1)
 
 

From be643366bffe93c8b79d7f2d982ed6319d693b51 Mon Sep 17 00:00:00 2001
From: Philip Weigel <pweigel@mit.edu>
Date: Fri, 22 Nov 2024 15:29:28 -0500
Subject: [PATCH 10/10] Simplify i3truthextractor set_gcd

---
 .../extractors/icecube/i3truthextractor.py    | 40 ++-----------------
 1 file changed, 4 insertions(+), 36 deletions(-)

diff --git a/src/graphnet/data/extractors/icecube/i3truthextractor.py b/src/graphnet/data/extractors/icecube/i3truthextractor.py
index ec64632ac..1c101def7 100644
--- a/src/graphnet/data/extractors/icecube/i3truthextractor.py
+++ b/src/graphnet/data/extractors/icecube/i3truthextractor.py
@@ -101,44 +101,12 @@ def set_gcd(self, i3_file: str, gcd_file: Optional[str] = None) -> None:
                       the i3 file instead. If either one of those are not
                       present, `RuntimeErrors` will be raised.
         """
-        if gcd_file is None:
-            # If no GCD file is provided, search the I3 file for frames
-            # containing geometry (GFrame) and calibration (CFrame)
-            gcd = dataio.I3File(i3_file)
-        else:
-            # Ideally ends here
-            gcd = dataio.I3File(gcd_file)
-
-        # Get GFrame
-        try:
-            g_frame = gcd.pop_frame(icetray.I3Frame.Geometry)
-            # If the line above fails, it means that no gcd file was given
-            # and that the i3 file does not have a G-Frame in it.
-        except RuntimeError as e:
-            self.error(
-                "No GCD file was provided "
-                f"and no G-frame was found in {i3_file.split('/')[-1]}."
-            )
-            raise e
-
-        # Get CFrame
-        try:
-            c_frame = gcd.pop_frame(icetray.I3Frame.Calibration)
-            # If the line above fails, it means that no gcd file was given
-            # and that the i3 file does not have a C-Frame in it.
-        except RuntimeError as e:
-            self.warning(
-                "No GCD file was provided and no C-frame "
-                f"was found in {i3_file.split('/')[-1]}."
-            )
-            raise e
-
-        # Save information as member variables of I3Extractor
-        self._gcd_dict = g_frame["I3Geometry"].omgeo
-        self._calibration = c_frame["I3Calibration"]
+        super().set_gcd(i3_file=i3_file, gcd_file=gcd_file)
 
+        # Modifications specific to I3TruthExtractor
+        # These modifications are needed to identify starting events
         coordinates = []
-        for omkey, g in self._gcd_dict.items():
+        for _, g in self._gcd_dict.items():
             if g.position.z > 1200:
                 continue  # We want to exclude icetop
             coordinates.append([g.position.x, g.position.y, g.position.z])