Skip to content

Commit

Permalink
Merge branch 'development' of https://github.com/openclimatefix/ocf_d…
Browse files Browse the repository at this point in the history
…ataset_alpha into development
  • Loading branch information
dfulu committed Aug 13, 2024
2 parents fa0cc63 + 1225963 commit ae3a57a
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 19 deletions.
7 changes: 4 additions & 3 deletions ocf_data_sampler/select/find_contiguous_time_periods.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ def find_contiguous_t0_periods_nwp(

for dt_init in datetimes[1:]:
# If the previous init time becomes stale before the next init becomes valid whilst also
# considering dropout and the need for a historic period - then the contiguous period breaks
if end_this_period < dt_init + hist_drop_buffer:
# considering dropout - then the contiguous period breaks, and new starts with considering
# dropout and history duration
if end_this_period < dt_init + max_dropout:
contiguous_periods += [[start_this_period, end_this_period]]

# And start a new period
Expand Down Expand Up @@ -278,4 +279,4 @@ def intersection_of_2_dataframes_of_periods(a: pd.DataFrame, b: pd.DataFrame) ->
all_intersecting_periods.append(intersection)

all_intersecting_periods = pd.concat(all_intersecting_periods)
return all_intersecting_periods.sort_values(by="start_dt").reset_index(drop=True)
return all_intersecting_periods.sort_values(by="start_dt").reset_index(drop=True)
41 changes: 25 additions & 16 deletions tests/select/test_find_contiguous_time_periods.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,11 @@ def test_find_contiguous_t0_periods_nwp():
[
"2023-01-01 05:00",
"2023-01-02 05:00",
"2023-01-02 14:00",
]
),
"end_dt": pd.to_datetime(
[
"2023-01-01 21:00",
"2023-01-02 12:00",
"2023-01-03 06:00",
]
),
Expand All @@ -79,14 +77,12 @@ def test_find_contiguous_t0_periods_nwp():
"start_dt": pd.to_datetime(
[
"2023-01-01 05:00",
"2023-01-01 11:00",
"2023-01-02 05:00",
"2023-01-02 14:00",
]
),
"end_dt": pd.to_datetime(
[
"2023-01-01 09:00",
"2023-01-01 18:00",
"2023-01-02 09:00",
"2023-01-03 03:00",
Expand All @@ -100,28 +96,40 @@ def test_find_contiguous_t0_periods_nwp():
[
"2023-01-01 05:00",
"2023-01-01 11:00",
"2023-01-01 14:00",
"2023-01-02 05:00",
"2023-01-02 14:00",
"2023-01-02 17:00",
"2023-01-02 20:00",
"2023-01-02 23:00",
]
),
"end_dt": pd.to_datetime(
[
"2023-01-01 06:00",
"2023-01-01 12:00",
"2023-01-01 15:00",
"2023-01-02 06:00",
"2023-01-02 15:00",
"2023-01-02 18:00",
"2023-01-02 21:00",
"2023-01-03 00:00",
]
),
},
),
pd.DataFrame(
{
"start_dt": pd.to_datetime(
[
"2023-01-01 06:00",
"2023-01-01 12:00",
"2023-01-02 06:00",
"2023-01-02 15:00",
]
),
"end_dt": pd.to_datetime(
[
"2023-01-01 09:00",
"2023-01-01 18:00",
"2023-01-02 09:00",
"2023-01-03 03:00",
]
),
},
),
]

# Create 3-hourly init times with a few time stamps missing
Expand All @@ -131,21 +139,22 @@ def test_find_contiguous_t0_periods_nwp():
pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq)
.delete([1, 4, 5, 6, 7, 9, 10])
)
steps = pd.to_timedelta(range(24), unit="h")

# Choose some history durations and max stalenesses
history_durations_hr = [0, 2, 2, 2]
max_stalenesses_hr = [9, 9, 6, 3]
history_durations_hr = [0, 2, 2, 2, 2]
max_stalenesses_hr = [9, 9, 6, 3, 6]
max_dropouts_hr = [0, 0, 0, 0, 3]

for i in range(len(expected_results)):
history_duration = pd.Timedelta(history_durations_hr[i], "h")
max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h")
max_dropout = pd.Timedelta(max_dropouts_hr[i], "h")

time_periods = find_contiguous_t0_periods_nwp(
datetimes=datetimes,
history_duration=history_duration,
max_staleness=max_staleness,
max_dropout = pd.Timedelta(0),
max_dropout=max_dropout,
)

# Check if results are as expected
Expand Down

0 comments on commit ae3a57a

Please sign in to comment.