Add Steve"s suggestions

xCDAT · Feb 16, 2024 · b7ccbaa · b7ccbaa
1 parent fb0f80e
commit b7ccbaa
Show file tree

Hide file tree

Showing 2 changed files with 79 additions and 12 deletions.
diff --git a/xcdat/bounds.py b/xcdat/bounds.py
@@ -329,10 +329,11 @@ def add_time_bounds(
     ) -> xr.Dataset:
         """Add bounds for an axis using its coordinate points.
 
-        This method general assumes data with time frequencies of annual,
-        monthly, daily, or sub-daily. It loops over the time axis coordinate
-        variables and attempts to add bounds for each of them if they don't
-        exist.
+        This method designed to operate on time axes that have constant temporal
+        resolution with annual, monthly, daily, or sub-daily time frequencies.
+        Alternate frequencies (e.g., pentad) are not supported. It loops over
+        the time axis coordinate variables and attempts to add bounds for each
+        of them if they don't exist.
 
         To add time bounds for the time axis, its coordinates must be the
         following criteria:

diff --git a/xcdat/temporal.py b/xcdat/temporal.py
@@ -188,8 +188,11 @@ def average(self, data_var: str, weighted: bool = True, keep_weights: bool = Fal
             The weight of masked (missing) data is excluded when averages are
             taken. This is the same as giving them a weight of 0.
 
-            Warning: If one time point spans across the time intervals that 
-            you are averaging into, then weights are not properly assigned.
+            Note that weights are assigned by the labeled time point. If the
+            dataset includes timepoints that span across typical boundaries
+            (e.g., a timepoint on 2020-06-01 with bounds that begin in May 2020
+            and end in June 2020), the weights will not be assigned properly.
+            See explanation in the Notes section below.
         keep_weights : bool, optional
             If calculating averages using weights, keep the weights in the
             final dataset output, by default False.
@@ -200,6 +203,20 @@ def average(self, data_var: str, weighted: bool = True, keep_weights: bool = Fal
             Dataset with the average of the data variable and the time dimension
             removed.
 
+        Notes
+        -----
+        When using weighted averages, the weights are assigned based on the
+        timepoint value. For example, a time point of 2020-06-15 with bounds
+        (2020-06-01, 2020-06-30) has 30 days of weight assigned to June, 2020
+        (e.g., for an annual average calculation). This would be expected
+        behavior, but it's possible that data could span across typical temporal
+        boundaries. For example, a time point of 2020-06-01 with bounds
+        (2020-05-16, 2020-06-15) would have 30 days of weight, but this weight
+        would be assigned to June, 2020, which would be incorrect (15 days of
+        weight should be assigned to May and 15 days of weight should be
+        assigned to June). This issue could plausibly arise when using pentad
+        data.
+
         Examples
         --------
 
@@ -256,8 +273,11 @@ def group_average(
             The weight of masked (missing) data is excluded when averages are
             calculated. This is the same as giving them a weight of 0.
 
-            Warning: If one time point spans across the time intervals that 
-            you are averaging into, then weights are not properly assigned.
+            Note that weights are assigned by the labeled time point. If the
+            dataset includes timepoints that span across typical boundaries
+            (e.g., a timepoint on 2020-06-01 with bounds that begin in May 2020
+            and end in June 2020), the weights will not be assigned properly.
+            See explanation in the Notes section below.
         keep_weights : bool, optional
             If calculating averages using weights, keep the weights in the
             final dataset output, by default False.
@@ -306,6 +326,20 @@ def group_average(
         xr.Dataset
             Dataset with the average of a data variable by time group.
 
+        Notes
+        -----
+        When using weighted averages, the weights are assigned based on the
+        timepoint value. For example, a time point of 2020-06-15 with bounds
+        (2020-06-01, 2020-06-30) has 30 days of weight assigned to June, 2020
+        (e.g., for an annual average calculation). This would be expected
+        behavior, but it's possible that data could span across typical temporal
+        boundaries. For example, a time point of 2020-06-01 with bounds
+        (2020-05-16, 2020-06-15) would have 30 days of weight, but this weight
+        would be assigned to June, 2020, which would be incorrect (15 days of
+        weight should be assigned to May and 15 days of weight should be
+        assigned to June). This issue could plausibly arise when using pentad
+        data.
+
         Examples
         --------
 
@@ -409,8 +443,11 @@ def climatology(
             The weight of masked (missing) data is excluded when averages are
             taken. This is the same as giving them a weight of 0.
 
-            Warning: If one time point spans across the time intervals that 
-            you are averaging into, then weights are not properly assigned.
+            Note that weights are assigned by the labeled time point. If the
+            dataset includes timepoints that span across typical boundaries
+            (e.g., a timepoint on 2020-06-01 with bounds that begin in May 2020
+            and end in June 2020), the weights will not be assigned properly.
+            See explanation in the Notes section below.
         keep_weights : bool, optional
             If calculating averages using weights, keep the weights in the
             final dataset output, by default False.
@@ -469,6 +506,20 @@ def climatology(
         ----------
         .. [1] https://github.com/xCDAT/xcdat/discussions/332
 
+        Notes
+        -----
+        When using weighted averages, the weights are assigned based on the
+        timepoint value. For example, a time point of 2020-06-15 with bounds
+        (2020-06-01, 2020-06-30) has 30 days of weight assigned to June, 2020
+        (e.g., for an annual average calculation). This would be expected
+        behavior, but it's possible that data could span across typical temporal
+        boundaries. For example, a time point of 2020-06-01 with bounds
+        (2020-05-16, 2020-06-15) would have 30 days of weight, but this weight
+        would be assigned to June, 2020, which would be incorrect (15 days of
+        weight should be assigned to May and 15 days of weight should be
+        assigned to June). This issue could plausibly arise when using pentad
+        data.
+
         Examples
         --------
 
@@ -580,8 +631,11 @@ def departures(
             The weight of masked (missing) data is excluded when averages are
             taken. This is the same as giving them a weight of 0.
 
-            Warning: If one time point spans across the time intervals that 
-            you are averaging into, then weights are not properly assigned.
+            Note that weights are assigned by the labeled time point. If the
+            dataset includes timepoints that span across typical boundaries
+            (e.g., a timepoint on 2020-06-01 with bounds that begin in May 2020
+            and end in June 2020), the weights will not be assigned properly.
+            See explanation in the Notes section below.
         keep_weights : bool, optional
             If calculating averages using weights, keep the weights in the
             final dataset output, by default False.
@@ -639,6 +693,18 @@ def departures(
 
         Notes
         -----
+        When using weighted averages, the weights are assigned based on the
+        timepoint value. For example, a time point of 2020-06-15 with bounds
+        (2020-06-01, 2020-06-30) has 30 days of weight assigned to June, 2020
+        (e.g., for an annual average calculation). This would be expected
+        behavior, but it's possible that data could span across typical temporal
+        boundaries. For example, a time point of 2020-06-01 with bounds
+        (2020-05-16, 2020-06-15) would have 30 days of weight, but this weight
+        would be assigned to June, 2020, which would be incorrect (15 days of
+        weight should be assigned to May and 15 days of weight should be
+        assigned to June). This issue could plausibly arise when using pentad
+        data.
+
         This method uses xarray's grouped arithmetic as a shortcut for mapping
         over all unique labels. Grouped arithmetic works by assigning a grouping
         label to each time coordinate of the observation data based on the