PennLINC · tsalo · Oct 11, 2023 · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023
diff --git a/docs/workflows.rst b/docs/workflows.rst
@@ -277,13 +277,16 @@ Denoising
 =========
 :class:`~xcp_d.interfaces.nilearn.DenoiseNifti`, :class:`~xcp_d.interfaces.nilearn.DenoiseCifti`
 
-Temporal censoring
-------------------
+Temporal censoring [OPTIONAL]
+-----------------------------
 
 Prior to confound regression, high-motion volumes will be removed from the BOLD data.
 These volumes will also be removed from the nuisance regressors.
 Please refer to :footcite:t:`power2012spurious` for more information.
 
+.. tip::
+   Censoring can be disabled by setting ``--fd-thresh 0``.
+
 
 Confound regression
 -------------------
@@ -339,6 +342,18 @@ Interpolation
 
 An interpolated version of the ``denoised BOLD`` is then created by filling in the high-motion
 outlier volumes with cubic spline interpolated data, as implemented in ``Nilearn``.
+
+.. warning::
+   In versions 0.4.0rc2 - 0.5.0, XCP-D used cubic spline interpolation,
+   followed by bandpass filtering.
+
+   However, cubic spline interpolation can introduce large spikes and drops in the signal
+   when the censored volumes are at the beginning or end of the run,
+   which are then propagated to the filtered data.
+
+   To address this, XCP-D now replaces interpolated volumes at the edges of the run with the
+   closest non-outlier volume's data, as of 0.5.1.
+
 The resulting ``interpolated, denoised BOLD`` is primarily used for bandpass filtering.
 
 

diff --git a/xcp_d/tests/test_cli.py b/xcp_d/tests/test_cli.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from nipype import logging
 from pkg_resources import resource_filename as pkgrf
 
 from xcp_d.cli import combineqc, parser_utils, run
@@ -17,6 +18,8 @@
     run_command,
 )
 
+LOGGER = logging.getLogger("nipype.utils")
+
 
 @pytest.mark.ds001419_nifti
 def test_ds001419_nifti(data_dir, output_dir, working_dir):
@@ -210,6 +213,17 @@ def test_pnc_cifti(data_dir, output_dir, working_dir):
     test_data_dir = get_test_data_path()
     filter_file = os.path.join(test_data_dir, "pnc_cifti_filter.json")
 
+    # Make the last few volumes outliers to check https://github.com/PennLINC/xcp_d/issues/949
+    motion_file = os.path.join(
+        dataset_dir,
+        "sub-1648798153/ses-PNC1/func/"
+        "sub-1648798153_ses-PNC1_task-rest_acq-singleband_desc-confounds_timeseries.tsv",
+    )
+    motion_df = pd.read_table(motion_file)
+    motion_df.loc[56:, "trans_x"] = np.arange(1, 5) * 20
+    motion_df.to_csv(motion_file, sep="\t", index=False)
+    LOGGER.warning(f"Overwrote confounds file at {motion_file}.")
+
     parameters = [
         dataset_dir,
         out_dir,
@@ -218,6 +232,7 @@ def test_pnc_cifti(data_dir, output_dir, working_dir):
         "--nthreads=2",
         "--omp-nthreads=2",
         f"--bids-filter-file={filter_file}",
+        "--min-time=60",
         "--nuisance-regressors=acompcor_gsr",
         "--despike",
         "--head_radius=40",

diff --git a/xcp_d/tests/test_utils_utils.py b/xcp_d/tests/test_utils_utils.py
@@ -152,6 +152,37 @@ def test_denoise_with_nilearn(ds001419_data, tmp_path_factory):
     assert uncensored_denoised_bold.shape == (n_volumes, n_voxels)
     assert interpolated_filtered_bold.shape == (n_volumes, n_voxels)
 
+    # Ensure that interpolation + filtering doesn't cause problems at beginning/end of scan
+    # Create an updated censoring file with outliers at first and last two volumes
+    censoring_df = confounds_df[["framewise_displacement"]]
+    censoring_df["framewise_displacement"] = False
+    censoring_df.iloc[:2]["framewise_displacement"] = True
+    censoring_df.iloc[-2:]["framewise_displacement"] = True
+    n_censored_volumes = censoring_df["framewise_displacement"].sum()
+    assert n_censored_volumes == 4
+    temporal_mask = os.path.join(tmpdir, "censoring.tsv")
+    censoring_df.to_csv(temporal_mask, sep="\t", index=False)
+
+    # Run without denoising or filtering
+    _, interpolated_filtered_bold = utils.denoise_with_nilearn(
+        preprocessed_bold=preprocessed_bold_arr,
+        confounds_file=None,
+        temporal_mask=temporal_mask,
+        low_pass=None,
+        high_pass=None,
+        filter_order=0,
+        TR=TR,
+    )
+    assert interpolated_filtered_bold.shape == (n_volumes, n_voxels)
+    # The first two volumes should be the same as the third (first non-outlier) volume
+    assert np.array_equal(interpolated_filtered_bold[0, :], interpolated_filtered_bold[2, :])
+    assert np.array_equal(interpolated_filtered_bold[1, :], interpolated_filtered_bold[2, :])
+    assert not np.array_equal(interpolated_filtered_bold[2, :], interpolated_filtered_bold[3, :])
+    # The last volume should be the same as the third-to-last (last non-outlier) volume
+    assert np.array_equal(interpolated_filtered_bold[-1, :], interpolated_filtered_bold[-3, :])
+    assert np.array_equal(interpolated_filtered_bold[-2, :], interpolated_filtered_bold[-3, :])
+    assert not np.array_equal(interpolated_filtered_bold[-3, :], interpolated_filtered_bold[-4, :])
+
 
 def test_list_to_str():
     """Test the list_to_str function."""

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
@@ -463,6 +463,36 @@ def denoise_with_nilearn(
         sample_mask=sample_mask,
         t_r=TR,
     )
+    # Replace any high-motion volumes at the beginning or end of the run with the closest
+    # low-motion volume's data.
+    # From https://stackoverflow.com/a/48106843/2589328
+    outlier_idx = sorted(set(np.where(~sample_mask)[0]))
+    if outlier_idx:
+        gaps = [[s, e] for s, e in zip(outlier_idx, outlier_idx[1:]) if s + 1 < e]
+        edges = iter(outlier_idx[:1] + sum(gaps, []) + outlier_idx[-1:])
+        consecutive_outliers_idx = list(zip(edges, edges))
+        first_outliers = consecutive_outliers_idx[0]
+        last_outliers = consecutive_outliers_idx[-1]
+
+        # Replace outliers at beginning of run
+        if first_outliers[0] == 0:
+            LOGGER.warning(
+                f"Outlier volumes at beginning of run ({first_outliers[0]}-{first_outliers[1]}) "
+                "will be replaced with first non-outlier volume's values."
+            )
+            interpolated_unfiltered_bold[
+                : first_outliers[1] + 1, :
+            ] = interpolated_unfiltered_bold[first_outliers[1] + 1, :]
+
+        # Replace outliers at end of run
+        if last_outliers[1] == n_volumes - 1:
+            LOGGER.warning(
+                f"Outlier volumes at end of run ({last_outliers[0]}-{last_outliers[1]}) "
+                "will be replaced with last non-outlier volume's values."
+            )
+            interpolated_unfiltered_bold[last_outliers[0] :, :] = interpolated_unfiltered_bold[
+                last_outliers[0] - 1, :
+            ]
 
     # Now apply the bandpass filter to the interpolated, denoised data
     if low_pass is not None and high_pass is not None: