diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb
index 90635d391..7dc7a5546 100644
--- a/nbs/common.base_windows.ipynb
+++ b/nbs/common.base_windows.ipynb
@@ -103,6 +103,7 @@
" windows_batch_size,\n",
" inference_windows_batch_size,\n",
" start_padding_enabled,\n",
+ " data_availability_threshold=0.0,\n",
" step_size=1,\n",
" num_lr_decays=0,\n",
" early_stop_patience_steps=-1,\n",
@@ -146,6 +147,7 @@
" self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0)\n",
" else:\n",
" self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)\n",
+ " self.data_availability_threshold = data_availability_threshold\n",
"\n",
" # Batch sizes\n",
" self.batch_size = batch_size\n",
@@ -221,11 +223,11 @@
" available_idx = temporal_cols.get_loc('available_mask')\n",
" available_condition = windows[:, :self.input_size, available_idx]\n",
" available_condition = torch.sum(available_condition, axis=1)\n",
- " final_condition = (available_condition > 0)\n",
+ " final_condition = (available_condition > self.data_availability_threshold * self.input_size)\n",
" if self.h > 0:\n",
" sample_condition = windows[:, self.input_size:, available_idx]\n",
" sample_condition = torch.sum(sample_condition, axis=1)\n",
- " final_condition = (sample_condition > 0) & (available_condition > 0)\n",
+ " final_condition = (sample_condition > self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size)\n",
" windows = windows[final_condition]\n",
"\n",
" # Parse Static data to match windows\n",
@@ -880,7 +882,39 @@
"id": "bf493ff9",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "#| hide\n",
+ "# Test that data_availability_threshold filters windows with low data availability\n",
+ "\n",
+ "#mark every odd data point as bad quality \n",
+ "AirPassengersDF['available_mask'] = [1 if i % 2 == 0 else 0 for i in range(len(AirPassengersDF))]\n",
+ "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
+ "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
+ "\n",
+ "train_loader = data.train_dataloader()\n",
+ "batch = next(iter(train_loader))\n",
+ "\n",
+ "basewindows = BaseWindows(h=12,\n",
+ " input_size=24,\n",
+ " hist_exog_list=['x', 'x2'],\n",
+ " futr_exog_list=['x'],\n",
+ " data_availability_threshold=0.8,\n",
+ " loss=MAE(),\n",
+ " valid_loss=MAE(),\n",
+ " learning_rate=0.001,\n",
+ " max_steps=1,\n",
+ " val_check_steps=0,\n",
+ " batch_size=1,\n",
+ " valid_batch_size=1,\n",
+ " windows_batch_size=10,\n",
+ " inference_windows_batch_size=2, \n",
+ " start_padding_enabled=False)\n",
+ "\n",
+ "try:\n",
+ " basewindows._create_windows(batch, step='train')\n",
+ "except Exception as e:\n",
+ " assert str(e) == \"No windows available for training\""
+ ]
}
],
"metadata": {
diff --git a/nbs/models.autoformer.ipynb b/nbs/models.autoformer.ipynb
index 422a17ce2..64b956b48 100644
--- a/nbs/models.autoformer.ipynb
+++ b/nbs/models.autoformer.ipynb
@@ -483,6 +483,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -532,6 +533,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -560,6 +562,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled = start_padding_enabled,\n",
+ " data_availability_threshold = data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
index 63582903a..580c3bd4d 100644
--- a/nbs/models.bitcn.ipynb
+++ b/nbs/models.bitcn.ipynb
@@ -166,6 +166,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -206,6 +207,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -234,6 +236,7 @@
" valid_batch_size=valid_batch_size,\n",
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
diff --git a/nbs/models.deepar.ipynb b/nbs/models.deepar.ipynb
index 7b32b6ac1..92e4d76de 100644
--- a/nbs/models.deepar.ipynb
+++ b/nbs/models.deepar.ipynb
@@ -177,6 +177,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb
index 58b29d453..39c8870c5 100644
--- a/nbs/models.deepnpts.ipynb
+++ b/nbs/models.deepnpts.ipynb
@@ -122,6 +122,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -167,6 +168,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'standard',\n",
" random_seed: int = 1,\n",
@@ -206,6 +208,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.dlinear.ipynb b/nbs/models.dlinear.ipynb
index 744a1823f..f3308a4fc 100644
--- a/nbs/models.dlinear.ipynb
+++ b/nbs/models.dlinear.ipynb
@@ -157,6 +157,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -197,6 +198,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -225,6 +227,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled = start_padding_enabled,\n",
+ " data_availability_threshold = data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.fedformer.ipynb b/nbs/models.fedformer.ipynb
index 40b4d015a..d5b1cf807 100644
--- a/nbs/models.fedformer.ipynb
+++ b/nbs/models.fedformer.ipynb
@@ -472,6 +472,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -515,6 +516,7 @@
" num_lr_decays: int = -1,\n",
" early_stop_patience_steps: int =-1,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
" valid_batch_size: Optional[int] = None,\n",
@@ -547,6 +549,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.informer.ipynb b/nbs/models.informer.ipynb
index ac9900c74..a8127e09e 100644
--- a/nbs/models.informer.ipynb
+++ b/nbs/models.informer.ipynb
@@ -292,6 +292,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -341,6 +342,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -369,6 +371,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size = inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.mlp.ipynb b/nbs/models.mlp.ipynb
index 83f8c0764..040541d5d 100644
--- a/nbs/models.mlp.ipynb
+++ b/nbs/models.mlp.ipynb
@@ -108,6 +108,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -147,6 +148,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = -1,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -177,6 +179,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nbeats.ipynb b/nbs/models.nbeats.ipynb
index 00fa3d0b9..3c885cb5a 100644
--- a/nbs/models.nbeats.ipynb
+++ b/nbs/models.nbeats.ipynb
@@ -264,6 +264,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -309,6 +310,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = -1,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str ='identity',\n",
" random_seed: int = 1,\n",
@@ -341,6 +343,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nbeatsx.ipynb b/nbs/models.nbeatsx.ipynb
index c70f072b0..26a923f37 100644
--- a/nbs/models.nbeatsx.ipynb
+++ b/nbs/models.nbeatsx.ipynb
@@ -408,6 +408,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random seed initialization for replicability.
\n",
@@ -459,6 +460,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = -1,\n",
" start_padding_enabled: bool = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = \"identity\",\n",
" random_seed: int = 1,\n",
@@ -495,6 +497,7 @@
" windows_batch_size = windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size = step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nhits.ipynb b/nbs/models.nhits.ipynb
index da17dc80b..ffee2a3e3 100644
--- a/nbs/models.nhits.ipynb
+++ b/nbs/models.nhits.ipynb
@@ -297,6 +297,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -348,6 +349,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = -1,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -378,6 +380,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nlinear.ipynb b/nbs/models.nlinear.ipynb
index 294d57ce8..9e95b0179 100644
--- a/nbs/models.nlinear.ipynb
+++ b/nbs/models.nlinear.ipynb
@@ -104,6 +104,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -143,6 +144,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -171,6 +173,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled = start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.patchtst.ipynb b/nbs/models.patchtst.ipynb
index 20e9f24b2..8c59472db 100644
--- a/nbs/models.patchtst.ipynb
+++ b/nbs/models.patchtst.ipynb
@@ -709,6 +709,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -766,6 +767,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size: int = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -794,6 +796,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.tft.ipynb b/nbs/models.tft.ipynb
index dad634bb2..526c1b2ed 100644
--- a/nbs/models.tft.ipynb
+++ b/nbs/models.tft.ipynb
@@ -667,6 +667,7 @@
" `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.
\n",
" `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `valid_batch_size`: int=None, number of different series in each validation and test batch.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
@@ -713,6 +714,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'robust',\n",
" num_workers_loader = 0,\n",
@@ -743,6 +745,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.tide.ipynb b/nbs/models.tide.ipynb
index 31901835b..b70bd9173 100644
--- a/nbs/models.tide.ipynb
+++ b/nbs/models.tide.ipynb
@@ -207,6 +207,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -238,6 +239,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
diff --git a/nbs/models.timellm.ipynb b/nbs/models.timellm.ipynb
index 7dd92b95b..b73dbd043 100644
--- a/nbs/models.timellm.ipynb
+++ b/nbs/models.timellm.ipynb
@@ -331,6 +331,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -386,6 +387,7 @@
" windows_batch_size: int = 1024,\n",
" inference_windows_batch_size: int = 1024,\n",
" start_padding_enabled: bool = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" num_lr_decays: int = 0,\n",
" early_stop_patience_steps: int = -1,\n",
@@ -415,6 +417,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb
index 18645b4da..d6b81e588 100644
--- a/nbs/models.timesnet.ipynb
+++ b/nbs/models.timesnet.ipynb
@@ -249,6 +249,8 @@
" Number of windows to sample in each inference batch.\n",
" start_padding_enabled : bool (default=False)\n",
" If True, the model will pad the time series with zeros at the beginning by input size.\n",
+ " `data_availability_threshold`: float (default=0.0) \n",
+ " Drop windows where the percentage of available data points is less than this threshold.
\n",
" scaler_type : str (default='standard')\n",
" Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" random_seed : int (default=1)\n",
@@ -301,6 +303,7 @@
" windows_batch_size = 64,\n",
" inference_windows_batch_size = 256,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'standard',\n",
" random_seed: int = 1,\n",
@@ -329,6 +332,7 @@
" valid_batch_size=valid_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled = start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb
index 34e4ac2b1..f85582e6a 100644
--- a/nbs/models.vanillatransformer.ipynb
+++ b/nbs/models.vanillatransformer.ipynb
@@ -190,6 +190,7 @@
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
" `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
+ " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
" `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
@@ -236,6 +237,7 @@
" windows_batch_size = 1024,\n",
" inference_windows_batch_size: int = 1024,\n",
" start_padding_enabled = False,\n",
+ " data_availability_threshold: float = 0.0,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -263,6 +265,7 @@
" windows_batch_size=windows_batch_size,\n",
" inference_windows_batch_size=inference_windows_batch_size,\n",
" start_padding_enabled=start_padding_enabled,\n",
+ " data_availability_threshold=data_availability_threshold,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
diff --git a/neuralforecast/common/_base_windows.py b/neuralforecast/common/_base_windows.py
index 416535c2e..aeda559d7 100644
--- a/neuralforecast/common/_base_windows.py
+++ b/neuralforecast/common/_base_windows.py
@@ -41,6 +41,7 @@ def __init__(
windows_batch_size,
inference_windows_batch_size,
start_padding_enabled,
+ data_availability_threshold=0.0,
step_size=1,
num_lr_decays=0,
early_stop_patience_steps=-1,
@@ -87,6 +88,7 @@ def __init__(
)
else:
self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)
+ self.data_availability_threshold = data_availability_threshold
# Batch sizes
self.batch_size = batch_size
@@ -164,15 +166,22 @@ def _create_windows(self, batch, step, w_idxs=None):
available_idx = temporal_cols.get_loc("available_mask")
available_condition = windows[:, : self.input_size, available_idx]
available_condition = torch.sum(available_condition, axis=1)
- final_condition = available_condition > 0
+ final_condition = (
+ available_condition > self.data_availability_threshold * self.input_size
+ )
if self.h > 0:
sample_condition = windows[:, self.input_size :, available_idx]
sample_condition = torch.sum(sample_condition, axis=1)
- final_condition = (sample_condition > 0) & (available_condition > 0)
+ final_condition = (
+ sample_condition > self.data_availability_threshold * self.h
+ ) & (
+ available_condition
+ > self.data_availability_threshold * self.input_size
+ )
windows = windows[final_condition]
# Parse Static data to match windows
- # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]
+ # [B, S_in] -> [B, Ws, S_in] -> self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size[B*Ws, S_in]
static = batch.get("static", None)
static_cols = batch.get("static_cols", None)
if static is not None:
diff --git a/neuralforecast/models/autoformer.py b/neuralforecast/models/autoformer.py
index 0dfad619c..898b577b4 100644
--- a/neuralforecast/models/autoformer.py
+++ b/neuralforecast/models/autoformer.py
@@ -468,6 +468,7 @@ class Autoformer(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -519,6 +520,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -549,6 +551,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/bitcn.py b/neuralforecast/models/bitcn.py
index 56396058e..a11bf67c3 100644
--- a/neuralforecast/models/bitcn.py
+++ b/neuralforecast/models/bitcn.py
@@ -102,6 +102,7 @@ class BiTCN(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -144,6 +145,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -173,6 +175,7 @@ def __init__(
valid_batch_size=valid_batch_size,
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
+ data_availability_threshold=data_availability_threshold,
start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
diff --git a/neuralforecast/models/deepar.py b/neuralforecast/models/deepar.py
index 522311633..980d40650 100644
--- a/neuralforecast/models/deepar.py
+++ b/neuralforecast/models/deepar.py
@@ -81,6 +81,7 @@ class DeepAR(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py
index 2caa4c008..4924ad282 100644
--- a/neuralforecast/models/deepnpts.py
+++ b/neuralforecast/models/deepnpts.py
@@ -43,6 +43,7 @@ class DeepNPTS(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -90,6 +91,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = 1024,
start_padding_enabled=False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
scaler_type: str = "standard",
random_seed: int = 1,
@@ -135,6 +137,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/dlinear.py b/neuralforecast/models/dlinear.py
index 213f8ff4b..bb80d921a 100644
--- a/neuralforecast/models/dlinear.py
+++ b/neuralforecast/models/dlinear.py
@@ -70,6 +70,7 @@ class DLinear(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -112,6 +113,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -142,6 +144,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/fedformer.py b/neuralforecast/models/fedformer.py
index c4d6710d9..fe9d5115e 100644
--- a/neuralforecast/models/fedformer.py
+++ b/neuralforecast/models/fedformer.py
@@ -463,6 +463,7 @@ class FEDformer(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -508,6 +509,7 @@ def __init__(
num_lr_decays: int = -1,
early_stop_patience_steps: int = -1,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
val_check_steps: int = 100,
batch_size: int = 32,
valid_batch_size: Optional[int] = None,
@@ -542,6 +544,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/informer.py b/neuralforecast/models/informer.py
index 2be88adbf..446cdcd30 100644
--- a/neuralforecast/models/informer.py
+++ b/neuralforecast/models/informer.py
@@ -209,6 +209,7 @@ class Informer(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -260,6 +261,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -290,6 +292,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/mlp.py b/neuralforecast/models/mlp.py
index 8ded36f7a..7929177b4 100644
--- a/neuralforecast/models/mlp.py
+++ b/neuralforecast/models/mlp.py
@@ -43,6 +43,7 @@ class MLP(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -84,6 +85,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=-1,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -116,6 +118,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nbeats.py b/neuralforecast/models/nbeats.py
index 5dfa5c7a2..4387730b1 100644
--- a/neuralforecast/models/nbeats.py
+++ b/neuralforecast/models/nbeats.py
@@ -222,6 +222,7 @@ class NBEATS(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -269,6 +270,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = -1,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -303,6 +305,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nbeatsx.py b/neuralforecast/models/nbeatsx.py
index 2547f1d81..1dd06f941 100644
--- a/neuralforecast/models/nbeatsx.py
+++ b/neuralforecast/models/nbeatsx.py
@@ -303,6 +303,7 @@ class NBEATSx(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random seed initialization for replicability.
@@ -354,6 +355,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = -1,
start_padding_enabled: bool = False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -391,6 +393,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nhits.py b/neuralforecast/models/nhits.py
index ebe9e784d..19c20b94b 100644
--- a/neuralforecast/models/nhits.py
+++ b/neuralforecast/models/nhits.py
@@ -220,6 +220,7 @@ class NHITS(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -273,6 +274,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = -1,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -305,6 +307,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nlinear.py b/neuralforecast/models/nlinear.py
index a44ca879c..555d88640 100644
--- a/neuralforecast/models/nlinear.py
+++ b/neuralforecast/models/nlinear.py
@@ -34,6 +34,7 @@ class NLinear(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -75,6 +76,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -105,6 +107,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/patchtst.py b/neuralforecast/models/patchtst.py
index af171b63e..b7084d92c 100644
--- a/neuralforecast/models/patchtst.py
+++ b/neuralforecast/models/patchtst.py
@@ -864,6 +864,7 @@ class PatchTST(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -923,6 +924,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size: int = 1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -953,6 +955,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/tft.py b/neuralforecast/models/tft.py
index 8d89322ee..ee7f0c2a3 100644
--- a/neuralforecast/models/tft.py
+++ b/neuralforecast/models/tft.py
@@ -406,6 +406,7 @@ class TFT(BaseWindows):
`windows_batch_size`: int=None, windows sampled from rolled data, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`valid_batch_size`: int=None, number of different series in each validation and test batch.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
@@ -454,6 +455,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = 1024,
start_padding_enabled=False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
scaler_type: str = "robust",
num_workers_loader=0,
@@ -485,6 +487,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/tide.py b/neuralforecast/models/tide.py
index d7df58373..507e380dc 100644
--- a/neuralforecast/models/tide.py
+++ b/neuralforecast/models/tide.py
@@ -122,6 +122,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size=1024,
start_padding_enabled=False,
+ data_availability_threshold=0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -154,6 +155,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
diff --git a/neuralforecast/models/timellm.py b/neuralforecast/models/timellm.py
index a14381c53..fcbe81557 100644
--- a/neuralforecast/models/timellm.py
+++ b/neuralforecast/models/timellm.py
@@ -260,6 +260,7 @@ class TimeLLM(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -316,6 +317,7 @@ def __init__(
windows_batch_size: int = 1024,
inference_windows_batch_size: int = 1024,
start_padding_enabled: bool = False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
num_lr_decays: int = 0,
early_stop_patience_steps: int = -1,
@@ -347,6 +349,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/timesnet.py b/neuralforecast/models/timesnet.py
index 3e5a1f074..034358ad8 100644
--- a/neuralforecast/models/timesnet.py
+++ b/neuralforecast/models/timesnet.py
@@ -166,6 +166,8 @@ class TimesNet(BaseWindows):
Number of windows to sample in each inference batch.
start_padding_enabled : bool (default=False)
If True, the model will pad the time series with zeros at the beginning by input size.
+ `data_availability_threshold`: float (default=0.0)
+ Drop windows where the percentage of available data points is less than this threshold.
scaler_type : str (default='standard')
Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
random_seed : int (default=1)
@@ -220,6 +222,7 @@ def __init__(
windows_batch_size=64,
inference_windows_batch_size=256,
start_padding_enabled=False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
scaler_type: str = "standard",
random_seed: int = 1,
@@ -250,6 +253,7 @@ def __init__(
valid_batch_size=valid_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/vanillatransformer.py b/neuralforecast/models/vanillatransformer.py
index 49d374c69..011d841b6 100644
--- a/neuralforecast/models/vanillatransformer.py
+++ b/neuralforecast/models/vanillatransformer.py
@@ -108,6 +108,7 @@ class VanillaTransformer(BaseWindows):
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -156,6 +157,7 @@ def __init__(
windows_batch_size=1024,
inference_windows_batch_size: int = 1024,
start_padding_enabled=False,
+ data_availability_threshold: float = 0.0,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -185,6 +187,7 @@ def __init__(
windows_batch_size=windows_batch_size,
inference_windows_batch_size=inference_windows_batch_size,
start_padding_enabled=start_padding_enabled,
+ data_availability_threshold=data_availability_threshold,
step_size=step_size,
scaler_type=scaler_type,
num_workers_loader=num_workers_loader,