From f08893760b1626493027440c9b4d4f329425ec37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:01:44 +0000 Subject: [PATCH] Undo right-shifting --- pyaerocom/stats/mda8/mda8.py | 9 ++------- tests/stats/mda8/test_mda8.py | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pyaerocom/stats/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py index 88e118f99..99ce4bbe4 100644 --- a/pyaerocom/stats/mda8/mda8.py +++ b/pyaerocom/stats/mda8/mda8.py @@ -100,13 +100,8 @@ def _calc_mda8(data: xr.DataArray) -> xr.DataArray: def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: - # Xarray labels the data left, while we want it to be labeled right, so we add 8h to - # the time index. - new_arr = arr.rolling(time=8, min_periods=6).mean() - # Xarray labels the data left, while we want it to be labeled right, so we add 8h to - # the time index. - new_arr["time"] = new_arr.get_index("time") + pd.Timedelta("8h") - return new_arr + # Xarray labels the data right based on the last data point in the period for rolling. + return arr.rolling(time=8, center=False, min_periods=6).mean() def _daily_max(arr: xr.DataArray) -> xr.DataArray: diff --git a/tests/stats/mda8/test_mda8.py b/tests/stats/mda8/test_mda8.py index 707a727d2..d81fb8294 100644 --- a/tests/stats/mda8/test_mda8.py +++ b/tests/stats/mda8/test_mda8.py @@ -1,7 +1,6 @@ import numpy as np import pytest import xarray as xr -import pandas as pd from pyaerocom.colocation.colocated_data import ColocatedData from pyaerocom.stats.mda8.mda8 import ( @@ -27,13 +26,13 @@ def test_data(time, values) -> xr.DataArray: pytest.param( xr.date_range(start="2024-01-01 01:00", periods=49, freq="1h"), [0.0] * 49, - [np.nan, 0, np.nan], + [0, 0, np.nan], id="zeros", ), pytest.param( xr.date_range(start="2024-01-01 01:00", periods=49, freq="1h"), np.linspace(start=1, stop=49, num=49), - [np.nan, 36.5, np.nan], + [20.5, 44.5, np.nan], id="incrementing-by-1", ), pytest.param( @@ -54,7 +53,7 @@ def test_data(time, values) -> xr.DataArray: pytest.param( xr.date_range(start="2024-01-01 06:00:00", periods=30, freq="1h"), np.arange(30), - [np.nan, 25.5], + [np.nan, np.nan], id="#1323", ), ), @@ -98,17 +97,17 @@ def test_coldata_to_mda8(coldata): assert mda8.shape == (2, 8, 1) assert mda8.data.values[0, :, 0] == pytest.approx( - [np.nan, np.nan, 1.18853785, 1.18604125, 1.18869106, 1.18879322, 1.18807846, 1.18700801], + [np.nan, np.nan, 1.18741556, 1.18777241, 1.18869106, 1.18879322, 1.18807846, 1.18700801], abs=10**-5, nan_ok=True, ) assert mda8.data.values[1, :, 0] == pytest.approx( [ - np.nan, + 1.57327333, 1.28884431, - 1.28853785, - 1.28604125, + 1.28741556, + 1.28777241, 1.28869106, 1.28879322, 1.28807846, @@ -156,7 +155,8 @@ def test_rollingaverage(test_data, exp_ravg): def test_rollingaverage_label(): """ Checks that the labels of rolling average is correct (we want it to be labeled based - on the "right-most" interval, ie. latest measurement in the interval). + on the "right-most" interval, ie. latest measurement in the interval). This is currently + the case in xarray but not greatly documented, so this test checks for that. https://github.com/metno/pyaerocom/issues/1323 """ @@ -169,8 +169,7 @@ def test_rollingaverage_label(): ravg = _rolling_average_8hr(data) assert np.all( - ravg.get_index("time") - == xr.date_range(start="2024-01-01 00:00", periods=24, freq="1h") + pd.Timedelta("8h") + ravg.get_index("time") == xr.date_range(start="2024-01-01 00:00", periods=24, freq="1h") )