Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mda8 misalignment #1322

Merged
merged 6 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pyaerocom/stats/mda8/mda8.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,11 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray:


def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray:
return arr.rolling(time=8, min_periods=6).mean()
# Xarray labels the data right based on the last data point in the period for rolling.
return arr.rolling(time=8, center=False, min_periods=6).mean()


def _daily_max(arr: xr.DataArray) -> xr.DataArray:
return arr.resample(time="24H", offset="1h").reduce(
return arr.resample(time="24H", origin="start_day", label="left", offset="1h").reduce(
lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18)
)
38 changes: 28 additions & 10 deletions tests/stats/mda8/test_mda8.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def test_data(time, values) -> xr.DataArray:
[np.nan] * 3,
id="with-nans",
),
# https://github.com/metno/pyaerocom/issues/1323
pytest.param(
xr.date_range(start="2024-01-01 06:00:00", periods=30, freq="1h"),
np.arange(30),
[np.nan, np.nan],
id="#1323",
),
),
)
def test_calc_mda8(test_data, exp_mda8):
Expand Down Expand Up @@ -90,16 +97,7 @@ def test_coldata_to_mda8(coldata):
assert mda8.shape == (2, 8, 1)

assert mda8.data.values[0, :, 0] == pytest.approx(
[
np.nan,
np.nan,
1.18741556,
1.18777241,
1.18869106,
1.18879322,
1.18807846,
1.18700801,
],
[np.nan, np.nan, 1.18741556, 1.18777241, 1.18869106, 1.18879322, 1.18807846, 1.18700801],
abs=10**-5,
nan_ok=True,
)
Expand Down Expand Up @@ -154,6 +152,26 @@ def test_rollingaverage(test_data, exp_ravg):
assert all(ravg[0, :, 0] == pytest.approx(exp_ravg, abs=0, nan_ok=True))


def test_rollingaverage_label():
"""
Checks that the labels of rolling average is correct (we want it to be labeled based
on the "right-most" interval, ie. latest measurement in the interval). This is currently
the case in xarray but not greatly documented, so this test checks for that.

https://github.com/metno/pyaerocom/issues/1323
"""
data = xr.DataArray(
[[[x] for x in range(24)]],
dims=["data_source", "time", "station_name"],
coords={"time": xr.date_range(start="2024-01-01 00:00", periods=24, freq="1h")},
)

ravg = _rolling_average_8hr(data)

assert ravg["time"].isel(time=0) == np.datetime64("2024-01-01 00:00")
assert ravg["time"].isel(time=23) == np.datetime64("2024-01-01 23:00")


@pytest.mark.parametrize(
"time,values,exp_daily_max",
(
Expand Down