From f84c9c0d641531c87b575649e36c57f8fec34e67 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:11:41 +0100 Subject: [PATCH 1/3] only account for dropout in find_contiguous_t0_periods_nwp after first dt_init --- .../select/find_contiguous_t0_time_periods.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ocf_data_sampler/select/find_contiguous_t0_time_periods.py b/ocf_data_sampler/select/find_contiguous_t0_time_periods.py index d8184ac..013c300 100644 --- a/ocf_data_sampler/select/find_contiguous_t0_time_periods.py +++ b/ocf_data_sampler/select/find_contiguous_t0_time_periods.py @@ -189,12 +189,12 @@ def find_contiguous_t0_periods_nwp( for dt_init in datetimes[1:]: # If the previous init time becomes stale before the next init becomes valid whilst also - # considering dropout and the need for a historic period - then the contiguous period breaks - if end_this_period < dt_init + hist_drop_buffer: + # considering dropout - then the contiguous period breaks + if end_this_period < dt_init + max_dropout: contiguous_periods += [[start_this_period, end_this_period]] # And start a new period - start_this_period = dt_init + hist_drop_buffer + start_this_period = dt_init + max_dropout end_this_period = dt_init + max_staleness contiguous_periods += [[start_this_period, end_this_period]] @@ -278,4 +278,4 @@ def intersection_of_2_dataframes_of_periods(a: pd.DataFrame, b: pd.DataFrame) -> all_intersecting_periods.append(intersection) all_intersecting_periods = pd.concat(all_intersecting_periods) - return all_intersecting_periods.sort_values(by="start_dt").reset_index(drop=True) \ No newline at end of file + return all_intersecting_periods.sort_values(by="start_dt").reset_index(drop=True) From e5cb0ab95ac24ce331040802718a67becf4f790b Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:53:08 +0100 Subject: [PATCH 2/3] account for history when beginning new period in find_contiguous_t0_period_nwp --- ocf_data_sampler/select/find_contiguous_t0_time_periods.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocf_data_sampler/select/find_contiguous_t0_time_periods.py b/ocf_data_sampler/select/find_contiguous_t0_time_periods.py index 013c300..2fcb2ff 100644 --- a/ocf_data_sampler/select/find_contiguous_t0_time_periods.py +++ b/ocf_data_sampler/select/find_contiguous_t0_time_periods.py @@ -189,12 +189,13 @@ def find_contiguous_t0_periods_nwp( for dt_init in datetimes[1:]: # If the previous init time becomes stale before the next init becomes valid whilst also - # considering dropout - then the contiguous period breaks + # considering dropout - then the contiguous period breaks, and new starts with considering + # dropout and history duration if end_this_period < dt_init + max_dropout: contiguous_periods += [[start_this_period, end_this_period]] # And start a new period - start_this_period = dt_init + max_dropout + start_this_period = dt_init + hist_drop_buffer end_this_period = dt_init + max_staleness contiguous_periods += [[start_this_period, end_this_period]] From 1225963e809f4b23563e4e72e04dbd588f714c94 Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Tue, 13 Aug 2024 11:01:20 +0100 Subject: [PATCH 3/3] update test_find_contiguous_t0_periods_nwp --- .../test_find_contiguous_t0_time_periods.py | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/tests/select/test_find_contiguous_t0_time_periods.py b/tests/select/test_find_contiguous_t0_time_periods.py index f8499fb..cb65767 100644 --- a/tests/select/test_find_contiguous_t0_time_periods.py +++ b/tests/select/test_find_contiguous_t0_time_periods.py @@ -62,13 +62,11 @@ def test_find_contiguous_t0_time_periods_nwp(): [ "2023-01-01 05:00", "2023-01-02 05:00", - "2023-01-02 14:00", ] ), "end_dt": pd.to_datetime( [ "2023-01-01 21:00", - "2023-01-02 12:00", "2023-01-03 06:00", ] ), @@ -79,14 +77,12 @@ def test_find_contiguous_t0_time_periods_nwp(): "start_dt": pd.to_datetime( [ "2023-01-01 05:00", - "2023-01-01 11:00", "2023-01-02 05:00", "2023-01-02 14:00", ] ), "end_dt": pd.to_datetime( [ - "2023-01-01 09:00", "2023-01-01 18:00", "2023-01-02 09:00", "2023-01-03 03:00", @@ -100,28 +96,40 @@ def test_find_contiguous_t0_time_periods_nwp(): [ "2023-01-01 05:00", "2023-01-01 11:00", - "2023-01-01 14:00", "2023-01-02 05:00", "2023-01-02 14:00", - "2023-01-02 17:00", - "2023-01-02 20:00", - "2023-01-02 23:00", ] ), "end_dt": pd.to_datetime( [ "2023-01-01 06:00", - "2023-01-01 12:00", "2023-01-01 15:00", "2023-01-02 06:00", - "2023-01-02 15:00", - "2023-01-02 18:00", - "2023-01-02 21:00", "2023-01-03 00:00", ] ), }, ), + pd.DataFrame( + { + "start_dt": pd.to_datetime( + [ + "2023-01-01 06:00", + "2023-01-01 12:00", + "2023-01-02 06:00", + "2023-01-02 15:00", + ] + ), + "end_dt": pd.to_datetime( + [ + "2023-01-01 09:00", + "2023-01-01 18:00", + "2023-01-02 09:00", + "2023-01-03 03:00", + ] + ), + }, + ), ] # Create 3-hourly init times with a few time stamps missing @@ -131,21 +139,22 @@ def test_find_contiguous_t0_time_periods_nwp(): pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq) .delete([1, 4, 5, 6, 7, 9, 10]) ) - steps = pd.to_timedelta(range(24), unit="h") # Choose some history durations and max stalenesses - history_durations_hr = [0, 2, 2, 2] - max_stalenesses_hr = [9, 9, 6, 3] + history_durations_hr = [0, 2, 2, 2, 2] + max_stalenesses_hr = [9, 9, 6, 3, 6] + max_dropouts_hr = [0, 0, 0, 0, 3] for i in range(len(expected_results)): history_duration = pd.Timedelta(history_durations_hr[i], "h") max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h") + max_dropout = pd.Timedelta(max_dropouts_hr[i], "h") time_periods = find_contiguous_t0_periods_nwp( datetimes=datetimes, history_duration=history_duration, max_staleness=max_staleness, - max_dropout = pd.Timedelta(0), + max_dropout=max_dropout, ) # Check if results are as expected