diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb index 90635d391..7dc7a5546 100644 --- a/nbs/common.base_windows.ipynb +++ b/nbs/common.base_windows.ipynb @@ -103,6 +103,7 @@ " windows_batch_size,\n", " inference_windows_batch_size,\n", " start_padding_enabled,\n", + " data_availability_threshold=0.0,\n", " step_size=1,\n", " num_lr_decays=0,\n", " early_stop_patience_steps=-1,\n", @@ -146,6 +147,7 @@ " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0)\n", " else:\n", " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)\n", + " self.data_availability_threshold = data_availability_threshold\n", "\n", " # Batch sizes\n", " self.batch_size = batch_size\n", @@ -221,11 +223,11 @@ " available_idx = temporal_cols.get_loc('available_mask')\n", " available_condition = windows[:, :self.input_size, available_idx]\n", " available_condition = torch.sum(available_condition, axis=1)\n", - " final_condition = (available_condition > 0)\n", + " final_condition = (available_condition > self.data_availability_threshold * self.input_size)\n", " if self.h > 0:\n", " sample_condition = windows[:, self.input_size:, available_idx]\n", " sample_condition = torch.sum(sample_condition, axis=1)\n", - " final_condition = (sample_condition > 0) & (available_condition > 0)\n", + " final_condition = (sample_condition > self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size)\n", " windows = windows[final_condition]\n", "\n", " # Parse Static data to match windows\n", @@ -880,7 +882,39 @@ "id": "bf493ff9", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#| hide\n", + "# Test that data_availability_threshold filters windows with low data availability\n", + "\n", + "#mark every odd data point as bad quality \n", + "AirPassengersDF['available_mask'] = [1 if i % 2 == 0 else 0 for i in range(len(AirPassengersDF))]\n", + "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n", + "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n", + "\n", + "train_loader = data.train_dataloader()\n", + "batch = next(iter(train_loader))\n", + "\n", + "basewindows = BaseWindows(h=12,\n", + " input_size=24,\n", + " hist_exog_list=['x', 'x2'],\n", + " futr_exog_list=['x'],\n", + " data_availability_threshold=0.8,\n", + " loss=MAE(),\n", + " valid_loss=MAE(),\n", + " learning_rate=0.001,\n", + " max_steps=1,\n", + " val_check_steps=0,\n", + " batch_size=1,\n", + " valid_batch_size=1,\n", + " windows_batch_size=10,\n", + " inference_windows_batch_size=2, \n", + " start_padding_enabled=False)\n", + "\n", + "try:\n", + " basewindows._create_windows(batch, step='train')\n", + "except Exception as e:\n", + " assert str(e) == \"No windows available for training\"" + ] } ], "metadata": { diff --git a/nbs/models.autoformer.ipynb b/nbs/models.autoformer.ipynb index 422a17ce2..64b956b48 100644 --- a/nbs/models.autoformer.ipynb +++ b/nbs/models.autoformer.ipynb @@ -483,6 +483,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -532,6 +533,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -560,6 +562,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled = start_padding_enabled,\n", + " data_availability_threshold = data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb index 63582903a..580c3bd4d 100644 --- a/nbs/models.bitcn.ipynb +++ b/nbs/models.bitcn.ipynb @@ -166,6 +166,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", @@ -206,6 +207,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -234,6 +236,7 @@ " valid_batch_size=valid_batch_size,\n", " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", + " data_availability_threshold=data_availability_threshold,\n", " start_padding_enabled=start_padding_enabled,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", diff --git a/nbs/models.deepar.ipynb b/nbs/models.deepar.ipynb index 7b32b6ac1..92e4d76de 100644 --- a/nbs/models.deepar.ipynb +++ b/nbs/models.deepar.ipynb @@ -177,6 +177,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index 58b29d453..39c8870c5 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -122,6 +122,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", @@ -167,6 +168,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'standard',\n", " random_seed: int = 1,\n", @@ -206,6 +208,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.dlinear.ipynb b/nbs/models.dlinear.ipynb index 744a1823f..f3308a4fc 100644 --- a/nbs/models.dlinear.ipynb +++ b/nbs/models.dlinear.ipynb @@ -157,6 +157,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -197,6 +198,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -225,6 +227,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled = start_padding_enabled,\n", + " data_availability_threshold = data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.fedformer.ipynb b/nbs/models.fedformer.ipynb index 40b4d015a..d5b1cf807 100644 --- a/nbs/models.fedformer.ipynb +++ b/nbs/models.fedformer.ipynb @@ -472,6 +472,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -515,6 +516,7 @@ " num_lr_decays: int = -1,\n", " early_stop_patience_steps: int =-1,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " val_check_steps: int = 100,\n", " batch_size: int = 32,\n", " valid_batch_size: Optional[int] = None,\n", @@ -547,6 +549,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.informer.ipynb b/nbs/models.informer.ipynb index ac9900c74..a8127e09e 100644 --- a/nbs/models.informer.ipynb +++ b/nbs/models.informer.ipynb @@ -292,6 +292,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -341,6 +342,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -369,6 +371,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size = inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.mlp.ipynb b/nbs/models.mlp.ipynb index 83f8c0764..040541d5d 100644 --- a/nbs/models.mlp.ipynb +++ b/nbs/models.mlp.ipynb @@ -108,6 +108,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", @@ -147,6 +148,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = -1,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -177,6 +179,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.nbeats.ipynb b/nbs/models.nbeats.ipynb index 00fa3d0b9..3c885cb5a 100644 --- a/nbs/models.nbeats.ipynb +++ b/nbs/models.nbeats.ipynb @@ -264,6 +264,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", @@ -309,6 +310,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = -1,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str ='identity',\n", " random_seed: int = 1,\n", @@ -341,6 +343,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.nbeatsx.ipynb b/nbs/models.nbeatsx.ipynb index c70f072b0..26a923f37 100644 --- a/nbs/models.nbeatsx.ipynb +++ b/nbs/models.nbeatsx.ipynb @@ -408,6 +408,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random seed initialization for replicability.
\n", @@ -459,6 +460,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = -1,\n", " start_padding_enabled: bool = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = \"identity\",\n", " random_seed: int = 1,\n", @@ -495,6 +497,7 @@ " windows_batch_size = windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size = step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.nhits.ipynb b/nbs/models.nhits.ipynb index da17dc80b..ffee2a3e3 100644 --- a/nbs/models.nhits.ipynb +++ b/nbs/models.nhits.ipynb @@ -297,6 +297,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", @@ -348,6 +349,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = -1,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -378,6 +380,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.nlinear.ipynb b/nbs/models.nlinear.ipynb index 294d57ce8..9e95b0179 100644 --- a/nbs/models.nlinear.ipynb +++ b/nbs/models.nlinear.ipynb @@ -104,6 +104,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -143,6 +144,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -171,6 +173,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled = start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.patchtst.ipynb b/nbs/models.patchtst.ipynb index 20e9f24b2..8c59472db 100644 --- a/nbs/models.patchtst.ipynb +++ b/nbs/models.patchtst.ipynb @@ -709,6 +709,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", @@ -766,6 +767,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -794,6 +796,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.tft.ipynb b/nbs/models.tft.ipynb index dad634bb2..526c1b2ed 100644 --- a/nbs/models.tft.ipynb +++ b/nbs/models.tft.ipynb @@ -667,6 +667,7 @@ " `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.
\n", " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `valid_batch_size`: int=None, number of different series in each validation and test batch.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", @@ -713,6 +714,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'robust',\n", " num_workers_loader = 0,\n", @@ -743,6 +745,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.tide.ipynb b/nbs/models.tide.ipynb index 31901835b..b70bd9173 100644 --- a/nbs/models.tide.ipynb +++ b/nbs/models.tide.ipynb @@ -207,6 +207,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -238,6 +239,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " random_seed=random_seed,\n", diff --git a/nbs/models.timellm.ipynb b/nbs/models.timellm.ipynb index 7dd92b95b..b73dbd043 100644 --- a/nbs/models.timellm.ipynb +++ b/nbs/models.timellm.ipynb @@ -331,6 +331,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `step_size`: int=1, step size between each window of temporal data.
\n", " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", @@ -386,6 +387,7 @@ " windows_batch_size: int = 1024,\n", " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled: bool = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " num_lr_decays: int = 0,\n", " early_stop_patience_steps: int = -1,\n", @@ -415,6 +417,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb index 18645b4da..d6b81e588 100644 --- a/nbs/models.timesnet.ipynb +++ b/nbs/models.timesnet.ipynb @@ -249,6 +249,8 @@ " Number of windows to sample in each inference batch.\n", " start_padding_enabled : bool (default=False)\n", " If True, the model will pad the time series with zeros at the beginning by input size.\n", + " `data_availability_threshold`: float (default=0.0) \n", + " Drop windows where the percentage of available data points is less than this threshold.
\n", " scaler_type : str (default='standard')\n", " Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " random_seed : int (default=1)\n", @@ -301,6 +303,7 @@ " windows_batch_size = 64,\n", " inference_windows_batch_size = 256,\n", " start_padding_enabled = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'standard',\n", " random_seed: int = 1,\n", @@ -329,6 +332,7 @@ " valid_batch_size=valid_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled = start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb index 34e4ac2b1..f85582e6a 100644 --- a/nbs/models.vanillatransformer.ipynb +++ b/nbs/models.vanillatransformer.ipynb @@ -190,6 +190,7 @@ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
\n", " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
\n", " `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", @@ -236,6 +237,7 @@ " windows_batch_size = 1024,\n", " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", + " data_availability_threshold: float = 0.0,\n", " step_size: int = 1,\n", " scaler_type: str = 'identity',\n", " random_seed: int = 1,\n", @@ -263,6 +265,7 @@ " windows_batch_size=windows_batch_size,\n", " inference_windows_batch_size=inference_windows_batch_size,\n", " start_padding_enabled=start_padding_enabled,\n", + " data_availability_threshold=data_availability_threshold,\n", " step_size=step_size,\n", " scaler_type=scaler_type,\n", " num_workers_loader=num_workers_loader,\n", diff --git a/neuralforecast/common/_base_windows.py b/neuralforecast/common/_base_windows.py index 416535c2e..aeda559d7 100644 --- a/neuralforecast/common/_base_windows.py +++ b/neuralforecast/common/_base_windows.py @@ -41,6 +41,7 @@ def __init__( windows_batch_size, inference_windows_batch_size, start_padding_enabled, + data_availability_threshold=0.0, step_size=1, num_lr_decays=0, early_stop_patience_steps=-1, @@ -87,6 +88,7 @@ def __init__( ) else: self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0) + self.data_availability_threshold = data_availability_threshold # Batch sizes self.batch_size = batch_size @@ -164,15 +166,22 @@ def _create_windows(self, batch, step, w_idxs=None): available_idx = temporal_cols.get_loc("available_mask") available_condition = windows[:, : self.input_size, available_idx] available_condition = torch.sum(available_condition, axis=1) - final_condition = available_condition > 0 + final_condition = ( + available_condition > self.data_availability_threshold * self.input_size + ) if self.h > 0: sample_condition = windows[:, self.input_size :, available_idx] sample_condition = torch.sum(sample_condition, axis=1) - final_condition = (sample_condition > 0) & (available_condition > 0) + final_condition = ( + sample_condition > self.data_availability_threshold * self.h + ) & ( + available_condition + > self.data_availability_threshold * self.input_size + ) windows = windows[final_condition] # Parse Static data to match windows - # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in] + # [B, S_in] -> [B, Ws, S_in] -> self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size[B*Ws, S_in] static = batch.get("static", None) static_cols = batch.get("static_cols", None) if static is not None: diff --git a/neuralforecast/models/autoformer.py b/neuralforecast/models/autoformer.py index 0dfad619c..898b577b4 100644 --- a/neuralforecast/models/autoformer.py +++ b/neuralforecast/models/autoformer.py @@ -468,6 +468,7 @@ class Autoformer(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -519,6 +520,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -549,6 +551,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/bitcn.py b/neuralforecast/models/bitcn.py index 56396058e..a11bf67c3 100644 --- a/neuralforecast/models/bitcn.py +++ b/neuralforecast/models/bitcn.py @@ -102,6 +102,7 @@ class BiTCN(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -144,6 +145,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -173,6 +175,7 @@ def __init__( valid_batch_size=valid_batch_size, windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, + data_availability_threshold=data_availability_threshold, start_padding_enabled=start_padding_enabled, step_size=step_size, scaler_type=scaler_type, diff --git a/neuralforecast/models/deepar.py b/neuralforecast/models/deepar.py index 522311633..980d40650 100644 --- a/neuralforecast/models/deepar.py +++ b/neuralforecast/models/deepar.py @@ -81,6 +81,7 @@ class DeepAR(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py index 2caa4c008..4924ad282 100644 --- a/neuralforecast/models/deepnpts.py +++ b/neuralforecast/models/deepnpts.py @@ -43,6 +43,7 @@ class DeepNPTS(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -90,6 +91,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = 1024, start_padding_enabled=False, + data_availability_threshold: float = 0.0, step_size: int = 1, scaler_type: str = "standard", random_seed: int = 1, @@ -135,6 +137,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/dlinear.py b/neuralforecast/models/dlinear.py index 213f8ff4b..bb80d921a 100644 --- a/neuralforecast/models/dlinear.py +++ b/neuralforecast/models/dlinear.py @@ -70,6 +70,7 @@ class DLinear(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -112,6 +113,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -142,6 +144,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/fedformer.py b/neuralforecast/models/fedformer.py index c4d6710d9..fe9d5115e 100644 --- a/neuralforecast/models/fedformer.py +++ b/neuralforecast/models/fedformer.py @@ -463,6 +463,7 @@ class FEDformer(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -508,6 +509,7 @@ def __init__( num_lr_decays: int = -1, early_stop_patience_steps: int = -1, start_padding_enabled=False, + data_availability_threshold=0.0, val_check_steps: int = 100, batch_size: int = 32, valid_batch_size: Optional[int] = None, @@ -542,6 +544,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/informer.py b/neuralforecast/models/informer.py index 2be88adbf..446cdcd30 100644 --- a/neuralforecast/models/informer.py +++ b/neuralforecast/models/informer.py @@ -209,6 +209,7 @@ class Informer(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -260,6 +261,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -290,6 +292,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/mlp.py b/neuralforecast/models/mlp.py index 8ded36f7a..7929177b4 100644 --- a/neuralforecast/models/mlp.py +++ b/neuralforecast/models/mlp.py @@ -43,6 +43,7 @@ class MLP(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -84,6 +85,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=-1, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -116,6 +118,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/nbeats.py b/neuralforecast/models/nbeats.py index 5dfa5c7a2..4387730b1 100644 --- a/neuralforecast/models/nbeats.py +++ b/neuralforecast/models/nbeats.py @@ -222,6 +222,7 @@ class NBEATS(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -269,6 +270,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = -1, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -303,6 +305,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/nbeatsx.py b/neuralforecast/models/nbeatsx.py index 2547f1d81..1dd06f941 100644 --- a/neuralforecast/models/nbeatsx.py +++ b/neuralforecast/models/nbeatsx.py @@ -303,6 +303,7 @@ class NBEATSx(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random seed initialization for replicability.
@@ -354,6 +355,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = -1, start_padding_enabled: bool = False, + data_availability_threshold: float = 0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -391,6 +393,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/nhits.py b/neuralforecast/models/nhits.py index ebe9e784d..19c20b94b 100644 --- a/neuralforecast/models/nhits.py +++ b/neuralforecast/models/nhits.py @@ -220,6 +220,7 @@ class NHITS(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -273,6 +274,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = -1, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -305,6 +307,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/nlinear.py b/neuralforecast/models/nlinear.py index a44ca879c..555d88640 100644 --- a/neuralforecast/models/nlinear.py +++ b/neuralforecast/models/nlinear.py @@ -34,6 +34,7 @@ class NLinear(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -75,6 +76,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -105,6 +107,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/patchtst.py b/neuralforecast/models/patchtst.py index af171b63e..b7084d92c 100644 --- a/neuralforecast/models/patchtst.py +++ b/neuralforecast/models/patchtst.py @@ -864,6 +864,7 @@ class PatchTST(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -923,6 +924,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size: int = 1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -953,6 +955,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/tft.py b/neuralforecast/models/tft.py index 8d89322ee..ee7f0c2a3 100644 --- a/neuralforecast/models/tft.py +++ b/neuralforecast/models/tft.py @@ -406,6 +406,7 @@ class TFT(BaseWindows): `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.
`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`valid_batch_size`: int=None, number of different series in each validation and test batch.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
@@ -454,6 +455,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = 1024, start_padding_enabled=False, + data_availability_threshold: float = 0.0, step_size: int = 1, scaler_type: str = "robust", num_workers_loader=0, @@ -485,6 +487,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/tide.py b/neuralforecast/models/tide.py index d7df58373..507e380dc 100644 --- a/neuralforecast/models/tide.py +++ b/neuralforecast/models/tide.py @@ -122,6 +122,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size=1024, start_padding_enabled=False, + data_availability_threshold=0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -154,6 +155,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, random_seed=random_seed, diff --git a/neuralforecast/models/timellm.py b/neuralforecast/models/timellm.py index a14381c53..fcbe81557 100644 --- a/neuralforecast/models/timellm.py +++ b/neuralforecast/models/timellm.py @@ -260,6 +260,7 @@ class TimeLLM(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -316,6 +317,7 @@ def __init__( windows_batch_size: int = 1024, inference_windows_batch_size: int = 1024, start_padding_enabled: bool = False, + data_availability_threshold: float = 0.0, step_size: int = 1, num_lr_decays: int = 0, early_stop_patience_steps: int = -1, @@ -347,6 +349,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/timesnet.py b/neuralforecast/models/timesnet.py index 3e5a1f074..034358ad8 100644 --- a/neuralforecast/models/timesnet.py +++ b/neuralforecast/models/timesnet.py @@ -166,6 +166,8 @@ class TimesNet(BaseWindows): Number of windows to sample in each inference batch. start_padding_enabled : bool (default=False) If True, the model will pad the time series with zeros at the beginning by input size. + `data_availability_threshold`: float (default=0.0) + Drop windows where the percentage of available data points is less than this threshold.
scaler_type : str (default='standard') Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
random_seed : int (default=1) @@ -220,6 +222,7 @@ def __init__( windows_batch_size=64, inference_windows_batch_size=256, start_padding_enabled=False, + data_availability_threshold: float = 0.0, step_size: int = 1, scaler_type: str = "standard", random_seed: int = 1, @@ -250,6 +253,7 @@ def __init__( valid_batch_size=valid_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader, diff --git a/neuralforecast/models/vanillatransformer.py b/neuralforecast/models/vanillatransformer.py index 49d374c69..011d841b6 100644 --- a/neuralforecast/models/vanillatransformer.py +++ b/neuralforecast/models/vanillatransformer.py @@ -108,6 +108,7 @@ class VanillaTransformer(BaseWindows): `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
`inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
@@ -156,6 +157,7 @@ def __init__( windows_batch_size=1024, inference_windows_batch_size: int = 1024, start_padding_enabled=False, + data_availability_threshold: float = 0.0, step_size: int = 1, scaler_type: str = "identity", random_seed: int = 1, @@ -185,6 +187,7 @@ def __init__( windows_batch_size=windows_batch_size, inference_windows_batch_size=inference_windows_batch_size, start_padding_enabled=start_padding_enabled, + data_availability_threshold=data_availability_threshold, step_size=step_size, scaler_type=scaler_type, num_workers_loader=num_workers_loader,