diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb
index 90635d391..7dc7a5546 100644
--- a/nbs/common.base_windows.ipynb
+++ b/nbs/common.base_windows.ipynb
@@ -103,6 +103,7 @@
     "                 windows_batch_size,\n",
     "                 inference_windows_batch_size,\n",
     "                 start_padding_enabled,\n",
+    "                 data_availability_threshold=0.0,\n",
     "                 step_size=1,\n",
     "                 num_lr_decays=0,\n",
     "                 early_stop_patience_steps=-1,\n",
@@ -146,6 +147,7 @@
     "            self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0)\n",
     "        else:\n",
     "            self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)\n",
+    "        self.data_availability_threshold = data_availability_threshold\n",
     "\n",
     "        # Batch sizes\n",
     "        self.batch_size = batch_size\n",
@@ -221,11 +223,11 @@
     "            available_idx = temporal_cols.get_loc('available_mask')\n",
     "            available_condition = windows[:, :self.input_size, available_idx]\n",
     "            available_condition = torch.sum(available_condition, axis=1)\n",
-    "            final_condition = (available_condition > 0)\n",
+    "            final_condition = (available_condition > self.data_availability_threshold * self.input_size)\n",
     "            if self.h > 0:\n",
     "                sample_condition = windows[:, self.input_size:, available_idx]\n",
     "                sample_condition = torch.sum(sample_condition, axis=1)\n",
-    "                final_condition = (sample_condition > 0) & (available_condition > 0)\n",
+    "                final_condition = (sample_condition > self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size)\n",
     "            windows = windows[final_condition]\n",
     "\n",
     "            # Parse Static data to match windows\n",
@@ -880,7 +882,39 @@
    "id": "bf493ff9",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#| hide\n",
+    "# Test that data_availability_threshold filters windows with low data availability\n",
+    "\n",
+    "#mark every odd data point as bad quality \n",
+    "AirPassengersDF['available_mask'] = [1 if i % 2 == 0 else 0 for i in range(len(AirPassengersDF))]\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
+    "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
+    "\n",
+    "train_loader =  data.train_dataloader()\n",
+    "batch = next(iter(train_loader))\n",
+    "\n",
+    "basewindows = BaseWindows(h=12,\n",
+    "                            input_size=24,\n",
+    "                            hist_exog_list=['x', 'x2'],\n",
+    "                            futr_exog_list=['x'],\n",
+    "                            data_availability_threshold=0.8,\n",
+    "                            loss=MAE(),\n",
+    "                            valid_loss=MAE(),\n",
+    "                            learning_rate=0.001,\n",
+    "                            max_steps=1,\n",
+    "                            val_check_steps=0,\n",
+    "                            batch_size=1,\n",
+    "                            valid_batch_size=1,\n",
+    "                            windows_batch_size=10,\n",
+    "                            inference_windows_batch_size=2, \n",
+    "                            start_padding_enabled=False)\n",
+    "\n",
+    "try:\n",
+    "    basewindows._create_windows(batch, step='train')\n",
+    "except Exception as e:\n",
+    "    assert str(e) == \"No windows available for training\""
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/models.autoformer.ipynb b/nbs/models.autoformer.ipynb
index 422a17ce2..64b956b48 100644
--- a/nbs/models.autoformer.ipynb
+++ b/nbs/models.autoformer.ipynb
@@ -483,6 +483,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -532,6 +533,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -560,6 +562,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       data_availability_threshold = data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
index 63582903a..580c3bd4d 100644
--- a/nbs/models.bitcn.ipynb
+++ b/nbs/models.bitcn.ipynb
@@ -166,6 +166,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -206,6 +207,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -234,6 +236,7 @@
     "            valid_batch_size=valid_batch_size,\n",
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
+    "            data_availability_threshold=data_availability_threshold,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
diff --git a/nbs/models.deepar.ipynb b/nbs/models.deepar.ipynb
index 7b32b6ac1..92e4d76de 100644
--- a/nbs/models.deepar.ipynb
+++ b/nbs/models.deepar.ipynb
@@ -177,6 +177,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb
index 58b29d453..39c8870c5 100644
--- a/nbs/models.deepnpts.ipynb
+++ b/nbs/models.deepnpts.ipynb
@@ -122,6 +122,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -167,6 +168,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold: float = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'standard',\n",
     "                 random_seed: int = 1,\n",
@@ -206,6 +208,7 @@
     "                                    valid_batch_size=valid_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    data_availability_threshold=data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.dlinear.ipynb b/nbs/models.dlinear.ipynb
index 744a1823f..f3308a4fc 100644
--- a/nbs/models.dlinear.ipynb
+++ b/nbs/models.dlinear.ipynb
@@ -157,6 +157,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -197,6 +198,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -225,6 +227,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       data_availability_threshold = data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.fedformer.ipynb b/nbs/models.fedformer.ipynb
index 40b4d015a..d5b1cf807 100644
--- a/nbs/models.fedformer.ipynb
+++ b/nbs/models.fedformer.ipynb
@@ -472,6 +472,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -515,6 +516,7 @@
     "                 num_lr_decays: int = -1,\n",
     "                 early_stop_patience_steps: int =-1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 val_check_steps: int = 100,\n",
     "                 batch_size: int = 32,\n",
     "                 valid_batch_size: Optional[int] = None,\n",
@@ -547,6 +549,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.informer.ipynb b/nbs/models.informer.ipynb
index ac9900c74..a8127e09e 100644
--- a/nbs/models.informer.ipynb
+++ b/nbs/models.informer.ipynb
@@ -292,6 +292,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -341,6 +342,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -369,6 +371,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size = inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.mlp.ipynb b/nbs/models.mlp.ipynb
index 83f8c0764..040541d5d 100644
--- a/nbs/models.mlp.ipynb
+++ b/nbs/models.mlp.ipynb
@@ -108,6 +108,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -147,6 +148,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -177,6 +179,7 @@
     "                                  windows_batch_size=windows_batch_size,\n",
     "                                  inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                  start_padding_enabled=start_padding_enabled,\n",
+    "                                  data_availability_threshold=data_availability_threshold,\n",
     "                                  step_size=step_size,\n",
     "                                  scaler_type=scaler_type,\n",
     "                                  num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nbeats.ipynb b/nbs/models.nbeats.ipynb
index 00fa3d0b9..3c885cb5a 100644
--- a/nbs/models.nbeats.ipynb
+++ b/nbs/models.nbeats.ipynb
@@ -264,6 +264,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -309,6 +310,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str ='identity',\n",
     "                 random_seed: int = 1,\n",
@@ -341,6 +343,7 @@
     "                                     valid_batch_size=valid_batch_size,\n",
     "                                     inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                     start_padding_enabled=start_padding_enabled,\n",
+    "                                     data_availability_threshold=data_availability_threshold,\n",
     "                                     step_size=step_size,\n",
     "                                     scaler_type=scaler_type,\n",
     "                                     num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nbeatsx.ipynb b/nbs/models.nbeatsx.ipynb
index c70f072b0..26a923f37 100644
--- a/nbs/models.nbeatsx.ipynb
+++ b/nbs/models.nbeatsx.ipynb
@@ -408,6 +408,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random seed initialization for replicability.<br>\n",
@@ -459,6 +460,7 @@
     "        windows_batch_size: int = 1024,\n",
     "        inference_windows_batch_size: int = -1,\n",
     "        start_padding_enabled: bool = False,\n",
+    "        data_availability_threshold: float = 0.0,\n",
     "        step_size: int = 1,\n",
     "        scaler_type: str = \"identity\",\n",
     "        random_seed: int = 1,\n",
@@ -495,6 +497,7 @@
     "                                      windows_batch_size = windows_batch_size,\n",
     "                                      inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                      start_padding_enabled=start_padding_enabled,\n",
+    "                                      data_availability_threshold=data_availability_threshold,\n",
     "                                      step_size = step_size,\n",
     "                                      scaler_type=scaler_type,\n",
     "                                      num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nhits.ipynb b/nbs/models.nhits.ipynb
index da17dc80b..ffee2a3e3 100644
--- a/nbs/models.nhits.ipynb
+++ b/nbs/models.nhits.ipynb
@@ -297,6 +297,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -348,6 +349,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -378,6 +380,7 @@
     "                                    valid_batch_size=valid_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    data_availability_threshold=data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.nlinear.ipynb b/nbs/models.nlinear.ipynb
index 294d57ce8..9e95b0179 100644
--- a/nbs/models.nlinear.ipynb
+++ b/nbs/models.nlinear.ipynb
@@ -104,6 +104,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -143,6 +144,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -171,6 +173,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.patchtst.ipynb b/nbs/models.patchtst.ipynb
index 20e9f24b2..8c59472db 100644
--- a/nbs/models.patchtst.ipynb
+++ b/nbs/models.patchtst.ipynb
@@ -709,6 +709,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -766,6 +767,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -794,6 +796,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.tft.ipynb b/nbs/models.tft.ipynb
index dad634bb2..526c1b2ed 100644
--- a/nbs/models.tft.ipynb
+++ b/nbs/models.tft.ipynb
@@ -667,6 +667,7 @@
     "    `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `valid_batch_size`: int=None, number of different series in each validation and test batch.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
@@ -713,6 +714,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold: float = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'robust',\n",
     "                 num_workers_loader = 0,\n",
@@ -743,6 +745,7 @@
     "                                  windows_batch_size=windows_batch_size,\n",
     "                                  inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                  start_padding_enabled=start_padding_enabled,\n",
+    "                                  data_availability_threshold=data_availability_threshold,\n",
     "                                  step_size=step_size,\n",
     "                                  scaler_type=scaler_type,\n",
     "                                  num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.tide.ipynb b/nbs/models.tide.ipynb
index 31901835b..b70bd9173 100644
--- a/nbs/models.tide.ipynb
+++ b/nbs/models.tide.ipynb
@@ -207,6 +207,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -238,6 +239,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            data_availability_threshold=data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.timellm.ipynb b/nbs/models.timellm.ipynb
index 7dd92b95b..b73dbd043 100644
--- a/nbs/models.timellm.ipynb
+++ b/nbs/models.timellm.ipynb
@@ -331,6 +331,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -386,6 +387,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled: bool = False,\n",
+    "                 data_availability_threshold: float = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 num_lr_decays: int = 0,\n",
     "                 early_stop_patience_steps: int = -1,\n",
@@ -415,6 +417,7 @@
     "                                      windows_batch_size=windows_batch_size,\n",
     "                                      inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                      start_padding_enabled=start_padding_enabled,\n",
+    "                                      data_availability_threshold=data_availability_threshold,\n",
     "                                      step_size=step_size,\n",
     "                                      scaler_type=scaler_type,\n",
     "                                      num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb
index 18645b4da..d6b81e588 100644
--- a/nbs/models.timesnet.ipynb
+++ b/nbs/models.timesnet.ipynb
@@ -249,6 +249,8 @@
     "        Number of windows to sample in each inference batch.\n",
     "    start_padding_enabled : bool (default=False)\n",
     "        If True, the model will pad the time series with zeros at the beginning by input size.\n",
+    "    `data_availability_threshold`: float (default=0.0) \n",
+    "        Drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    scaler_type : str (default='standard')\n",
     "        Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    random_seed : int (default=1)\n",
@@ -301,6 +303,7 @@
     "                 windows_batch_size = 64,\n",
     "                 inference_windows_batch_size = 256,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold: float = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'standard',\n",
     "                 random_seed: int = 1,\n",
@@ -329,6 +332,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb
index 34e4ac2b1..f85582e6a 100644
--- a/nbs/models.vanillatransformer.ipynb
+++ b/nbs/models.vanillatransformer.ipynb
@@ -190,6 +190,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>\n",
@@ -236,6 +237,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 data_availability_threshold: float = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -263,6 +265,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       data_availability_threshold=data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       num_workers_loader=num_workers_loader,\n",
diff --git a/neuralforecast/common/_base_windows.py b/neuralforecast/common/_base_windows.py
index 416535c2e..aeda559d7 100644
--- a/neuralforecast/common/_base_windows.py
+++ b/neuralforecast/common/_base_windows.py
@@ -41,6 +41,7 @@ def __init__(
         windows_batch_size,
         inference_windows_batch_size,
         start_padding_enabled,
+        data_availability_threshold=0.0,
         step_size=1,
         num_lr_decays=0,
         early_stop_patience_steps=-1,
@@ -87,6 +88,7 @@ def __init__(
             )
         else:
             self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)
+        self.data_availability_threshold = data_availability_threshold
 
         # Batch sizes
         self.batch_size = batch_size
@@ -164,15 +166,22 @@ def _create_windows(self, batch, step, w_idxs=None):
             available_idx = temporal_cols.get_loc("available_mask")
             available_condition = windows[:, : self.input_size, available_idx]
             available_condition = torch.sum(available_condition, axis=1)
-            final_condition = available_condition > 0
+            final_condition = (
+                available_condition > self.data_availability_threshold * self.input_size
+            )
             if self.h > 0:
                 sample_condition = windows[:, self.input_size :, available_idx]
                 sample_condition = torch.sum(sample_condition, axis=1)
-                final_condition = (sample_condition > 0) & (available_condition > 0)
+                final_condition = (
+                    sample_condition > self.data_availability_threshold * self.h
+                ) & (
+                    available_condition
+                    > self.data_availability_threshold * self.input_size
+                )
             windows = windows[final_condition]
 
             # Parse Static data to match windows
-            # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]
+            # [B, S_in] -> [B, Ws, S_in] -> self.data_availability_threshold * self.h) & (available_condition > self.data_availability_threshold * self.input_size[B*Ws, S_in]
             static = batch.get("static", None)
             static_cols = batch.get("static_cols", None)
             if static is not None:
diff --git a/neuralforecast/models/autoformer.py b/neuralforecast/models/autoformer.py
index 0dfad619c..898b577b4 100644
--- a/neuralforecast/models/autoformer.py
+++ b/neuralforecast/models/autoformer.py
@@ -468,6 +468,7 @@ class Autoformer(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -519,6 +520,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -549,6 +551,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/bitcn.py b/neuralforecast/models/bitcn.py
index 56396058e..a11bf67c3 100644
--- a/neuralforecast/models/bitcn.py
+++ b/neuralforecast/models/bitcn.py
@@ -102,6 +102,7 @@ class BiTCN(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -144,6 +145,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -173,6 +175,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
+            data_availability_threshold=data_availability_threshold,
             start_padding_enabled=start_padding_enabled,
             step_size=step_size,
             scaler_type=scaler_type,
diff --git a/neuralforecast/models/deepar.py b/neuralforecast/models/deepar.py
index 522311633..980d40650 100644
--- a/neuralforecast/models/deepar.py
+++ b/neuralforecast/models/deepar.py
@@ -81,6 +81,7 @@ class DeepAR(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py
index 2caa4c008..4924ad282 100644
--- a/neuralforecast/models/deepnpts.py
+++ b/neuralforecast/models/deepnpts.py
@@ -43,6 +43,7 @@ class DeepNPTS(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -90,6 +91,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         scaler_type: str = "standard",
         random_seed: int = 1,
@@ -135,6 +137,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/dlinear.py b/neuralforecast/models/dlinear.py
index 213f8ff4b..bb80d921a 100644
--- a/neuralforecast/models/dlinear.py
+++ b/neuralforecast/models/dlinear.py
@@ -70,6 +70,7 @@ class DLinear(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -112,6 +113,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -142,6 +144,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/fedformer.py b/neuralforecast/models/fedformer.py
index c4d6710d9..fe9d5115e 100644
--- a/neuralforecast/models/fedformer.py
+++ b/neuralforecast/models/fedformer.py
@@ -463,6 +463,7 @@ class FEDformer(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -508,6 +509,7 @@ def __init__(
         num_lr_decays: int = -1,
         early_stop_patience_steps: int = -1,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         val_check_steps: int = 100,
         batch_size: int = 32,
         valid_batch_size: Optional[int] = None,
@@ -542,6 +544,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/informer.py b/neuralforecast/models/informer.py
index 2be88adbf..446cdcd30 100644
--- a/neuralforecast/models/informer.py
+++ b/neuralforecast/models/informer.py
@@ -209,6 +209,7 @@ class Informer(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -260,6 +261,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -290,6 +292,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/mlp.py b/neuralforecast/models/mlp.py
index 8ded36f7a..7929177b4 100644
--- a/neuralforecast/models/mlp.py
+++ b/neuralforecast/models/mlp.py
@@ -43,6 +43,7 @@ class MLP(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -84,6 +85,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=-1,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -116,6 +118,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nbeats.py b/neuralforecast/models/nbeats.py
index 5dfa5c7a2..4387730b1 100644
--- a/neuralforecast/models/nbeats.py
+++ b/neuralforecast/models/nbeats.py
@@ -222,6 +222,7 @@ class NBEATS(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -269,6 +270,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -303,6 +305,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nbeatsx.py b/neuralforecast/models/nbeatsx.py
index 2547f1d81..1dd06f941 100644
--- a/neuralforecast/models/nbeatsx.py
+++ b/neuralforecast/models/nbeatsx.py
@@ -303,6 +303,7 @@ class NBEATSx(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random seed initialization for replicability.<br>
@@ -354,6 +355,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled: bool = False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -391,6 +393,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nhits.py b/neuralforecast/models/nhits.py
index ebe9e784d..19c20b94b 100644
--- a/neuralforecast/models/nhits.py
+++ b/neuralforecast/models/nhits.py
@@ -220,6 +220,7 @@ class NHITS(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -273,6 +274,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -305,6 +307,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/nlinear.py b/neuralforecast/models/nlinear.py
index a44ca879c..555d88640 100644
--- a/neuralforecast/models/nlinear.py
+++ b/neuralforecast/models/nlinear.py
@@ -34,6 +34,7 @@ class NLinear(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -75,6 +76,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -105,6 +107,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/patchtst.py b/neuralforecast/models/patchtst.py
index af171b63e..b7084d92c 100644
--- a/neuralforecast/models/patchtst.py
+++ b/neuralforecast/models/patchtst.py
@@ -864,6 +864,7 @@ class PatchTST(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -923,6 +924,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -953,6 +955,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/tft.py b/neuralforecast/models/tft.py
index 8d89322ee..ee7f0c2a3 100644
--- a/neuralforecast/models/tft.py
+++ b/neuralforecast/models/tft.py
@@ -406,6 +406,7 @@ class TFT(BaseWindows):
     `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `valid_batch_size`: int=None, number of different series in each validation and test batch.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
@@ -454,6 +455,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         num_workers_loader=0,
@@ -485,6 +487,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/tide.py b/neuralforecast/models/tide.py
index d7df58373..507e380dc 100644
--- a/neuralforecast/models/tide.py
+++ b/neuralforecast/models/tide.py
@@ -122,6 +122,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -154,6 +155,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/timellm.py b/neuralforecast/models/timellm.py
index a14381c53..fcbe81557 100644
--- a/neuralforecast/models/timellm.py
+++ b/neuralforecast/models/timellm.py
@@ -260,6 +260,7 @@ class TimeLLM(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -316,6 +317,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled: bool = False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         num_lr_decays: int = 0,
         early_stop_patience_steps: int = -1,
@@ -347,6 +349,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/timesnet.py b/neuralforecast/models/timesnet.py
index 3e5a1f074..034358ad8 100644
--- a/neuralforecast/models/timesnet.py
+++ b/neuralforecast/models/timesnet.py
@@ -166,6 +166,8 @@ class TimesNet(BaseWindows):
         Number of windows to sample in each inference batch.
     start_padding_enabled : bool (default=False)
         If True, the model will pad the time series with zeros at the beginning by input size.
+    `data_availability_threshold`: float (default=0.0)
+        Drop windows where the percentage of available data points is less than this threshold.<br>
     scaler_type : str (default='standard')
         Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     random_seed : int (default=1)
@@ -220,6 +222,7 @@ def __init__(
         windows_batch_size=64,
         inference_windows_batch_size=256,
         start_padding_enabled=False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         scaler_type: str = "standard",
         random_seed: int = 1,
@@ -250,6 +253,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,
diff --git a/neuralforecast/models/vanillatransformer.py b/neuralforecast/models/vanillatransformer.py
index 49d374c69..011d841b6 100644
--- a/neuralforecast/models/vanillatransformer.py
+++ b/neuralforecast/models/vanillatransformer.py
@@ -108,6 +108,7 @@ class VanillaTransformer(BaseWindows):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `data_availability_threshold`: float=0.0, drop windows where the percentage of available data points is less than this threshold.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
@@ -156,6 +157,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        data_availability_threshold: float = 0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -185,6 +187,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            data_availability_threshold=data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             num_workers_loader=num_workers_loader,