From c284f7485fcfc38e1d3182a4eb5499c2a0ccb5c0 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 2 Jul 2024 16:42:01 -0400 Subject: [PATCH 01/21] add some outlier detection utility functions --- src/stcal/outlier_detection/__init__.py | 0 src/stcal/outlier_detection/utils.py | 250 ++++++++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 src/stcal/outlier_detection/__init__.py create mode 100644 src/stcal/outlier_detection/utils.py diff --git a/src/stcal/outlier_detection/__init__.py b/src/stcal/outlier_detection/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py new file mode 100644 index 00000000..e4a91d78 --- /dev/null +++ b/src/stcal/outlier_detection/utils.py @@ -0,0 +1,250 @@ +""" +Utility functions for outlier detection routines +""" +import warnings + +import numpy as np +from astropy.stats import sigma_clip +from drizzle.cdrizzle import tblot +from scipy import ndimage +import gwcs + +from stcal.alignment.util import wcs_bbox_from_shape + +import logging +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + + +def compute_weight_threshold(weight, maskpt): + ''' + Compute the weight threshold for a single image or cube. + + Parameters + ---------- + weight : numpy.ndarray + The weight array + + maskpt : float + The percentage of the mean weight to use as a threshold for masking. + + Returns + ------- + float + The weight threshold for this integration. + ''' + + # necessary in order to assure that mask gets applied correctly + if hasattr(weight, '_mask'): + del weight._mask + mask_zero_weight = np.equal(weight, 0.) + mask_nans = np.isnan(weight) + # Combine the masks + weight_masked = np.ma.array(weight, mask=np.logical_or( + mask_zero_weight, mask_nans)) + # Sigma-clip the unmasked data + weight_masked = sigma_clip(weight_masked, sigma=3, maxiters=5) + mean_weight = np.mean(weight_masked) + # Mask pixels where weight falls below maskpt percent + weight_threshold = mean_weight * maskpt + return weight_threshold + + +def _abs_deriv(array): + """Take the absolute derivate of a numpy array.""" + tmp = np.zeros(array.shape, dtype=np.float64) + out = np.zeros(array.shape, dtype=np.float64) + + tmp[1:, :] = array[:-1, :] + tmp, out = _absolute_subtract(array, tmp, out) + tmp[:-1, :] = array[1:, :] + tmp, out = _absolute_subtract(array, tmp, out) + + tmp[:, 1:] = array[:, :-1] + tmp, out = _absolute_subtract(array, tmp, out) + tmp[:, :-1] = array[:, 1:] + tmp, out = _absolute_subtract(array, tmp, out) + + return out + + +def _absolute_subtract(array, tmp, out): + tmp = np.abs(array - tmp) + out = np.maximum(tmp, out) + tmp = tmp * 0. + return tmp, out + + +def flag_cr( + sci_data, + sci_err, + blot_data, + snr1, + snr2, + scale1, + scale2, + backg, + resample_data, +): + """Masks outliers in science image by updating DQ in-place + + Mask blemishes in dithered data by comparing a science image + with a model image and the derivative of the model image. + + Parameters + ---------- + + FIXME: update these + + sci_image : ~jwst.datamodels.ImageModel + the science data. Can also accept a CubeModel, but only if + resample_data is False + + blot_array : np.ndarray + the blotted median image of the dithered science frames. + + snr : str + Signal-to-noise ratio + + scale : str + scaling factor applied to the derivative + + backg : float + Background value (scalar) to subtract + + resample_data : bool + Boolean to indicate whether blot_image is created from resampled, + dithered data or not + + Notes + ----- + Accepting a CubeModel for sci_image and blot_image with resample_data=True + appears to be a relatively simple extension, as the only thing that explicitly + relies on the dimensionality is the kernel, which could be generalized. + However, this is not currently needed, as CubeModels are only passed in for + TSO data, where resampling is always False. + """ + err_data = np.nan_to_num(sci_err) + + # create the outlier mask + if resample_data: # dithered outlier detection + blot_deriv = _abs_deriv(blot_data) + diff_noise = np.abs(sci_data - blot_data - backg) + + # Create a boolean mask based on a scaled version of + # the derivative image (dealing with interpolating issues?) + # and the standard n*sigma above the noise + threshold1 = scale1 * blot_deriv + snr1 * err_data + mask1 = np.greater(diff_noise, threshold1) + + # Smooth the boolean mask with a 3x3 boxcar kernel + kernel = np.ones((3, 3), dtype=int) + mask1_smoothed = ndimage.convolve(mask1, kernel, mode='nearest') + + # Create a 2nd boolean mask based on the 2nd set of + # scale and threshold values + threshold2 = scale2 * blot_deriv + snr2 * err_data + mask2 = np.greater(diff_noise, threshold2) + + # Final boolean mask + cr_mask = mask1_smoothed & mask2 + + else: # stack outlier detection + diff_noise = np.abs(sci_data - blot_data) + + # straightforward detection of outliers for non-dithered data since + # err_data includes all noise sources (photon, read, and flat for baseline) + cr_mask = np.greater(diff_noise, snr1 * err_data) + + return cr_mask + + +# FIXME (or fixed) interp and sinscl were "options" only when provided +# as part of the step spec (which becomes outlierpars). As neither was +# in the spec (and providing unknown arguments causes an error), these +# were never configurable and always defaulted to linear and 1.0 +def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): + """ + Resample the output/resampled image to recreate an input image based on + the input image's world coordinate system + + Parameters + ---------- + median_model : `~stdatamodels.jwst.datamodels.JwstDataModel` + + blot_img : datamodel + Datamodel containing header and WCS to define the 'blotted' image + """ + # Compute the mapping between the input and output pixel coordinates + # TODO stcal.alignment.resample_utils.calc_pixmap does not work here + pixmap = calc_gwcs_pixmap(blot_wcs, median_wcs, blot_data.shape) + log.debug("Pixmap shape: {}".format(pixmap[:, :, 0].shape)) + log.debug("Sci shape: {}".format(blot_data.shape)) + log.info('Blotting {} <-- {}'.format(blot_data.shape, median_data.shape)) + + outsci = np.zeros(blot_data.shape, dtype=np.float32) + + # Currently tblot cannot handle nans in the pixmap, so we need to give some + # other value. -1 is not optimal and may have side effects. But this is + # what we've been doing up until now, so more investigation is needed + # before a change is made. Preferably, fix tblot in drizzle. + pixmap[np.isnan(pixmap)] = -1 + tblot(median_data, pixmap, outsci, scale=pix_ratio, kscale=1.0, + interp='linear', exptime=1.0, misval=0.0, sinscl=1.0) + + return outsci + + +def calc_gwcs_pixmap(in_wcs, out_wcs, shape): + """ Return a pixel grid map from input frame to output frame. + """ + bb = wcs_bbox_from_shape(shape) + log.debug("Bounding box from data shape: {}".format(bb)) + + grid = gwcs.wcstools.grid_from_bounding_box(bb) + # TODO does stcal reproject work? + pixmap = np.dstack(reproject(in_wcs, out_wcs)(grid[0], grid[1])) + + return pixmap + + +def reproject(wcs1, wcs2): + """ + Given two WCSs or transforms return a function which takes pixel + coordinates in the first WCS or transform and computes them in the second + one. It performs the forward transformation of ``wcs1`` followed by the + inverse of ``wcs2``. + + Parameters + ---------- + wcs1, wcs2 : `~astropy.wcs.WCS` or `~gwcs.wcs.WCS` + WCS objects that have `pixel_to_world_values` and `world_to_pixel_values` + methods. + + Returns + ------- + _reproject : func + Function to compute the transformations. It takes x, y + positions in ``wcs1`` and returns x, y positions in ``wcs2``. + """ + + try: + forward_transform = wcs1.pixel_to_world_values + backward_transform = wcs2.world_to_pixel_values + except AttributeError as err: + raise TypeError("Input should be a WCS") from err + + def _reproject(x, y): + sky = forward_transform(x, y) + flat_sky = [] + for axis in sky: + flat_sky.append(axis.flatten()) + # Filter out RuntimeWarnings due to computed NaNs in the WCS + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + det = backward_transform(*tuple(flat_sky)) + det_reshaped = [] + for axis in det: + det_reshaped.append(axis.reshape(x.shape)) + return tuple(det_reshaped) + return _reproject From ac93830d71d7b1cfd17db7f437f29f487abf280e Mon Sep 17 00:00:00 2001 From: Brett Date: Wed, 3 Jul 2024 13:56:13 -0400 Subject: [PATCH 02/21] add medfilt --- src/stcal/outlier_detection/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index e4a91d78..ef834402 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -7,6 +7,7 @@ from astropy.stats import sigma_clip from drizzle.cdrizzle import tblot from scipy import ndimage +from skimage.util import view_as_windows import gwcs from stcal.alignment.util import wcs_bbox_from_shape @@ -16,6 +17,14 @@ log.setLevel(logging.DEBUG) +def medfilt(arr, kern_size): + # scipy.signal.medfilt (and many other median filters) have undefined behavior + # for nan inputs. See: https://github.com/scipy/scipy/issues/4800 + padded = np.pad(arr, [[k // 2] for k in kern_size]) + windows = view_as_windows(padded, kern_size, np.ones(len(kern_size), dtype='int')) + return np.nanmedian(windows, axis=np.arange(-len(kern_size), 0)) + + def compute_weight_threshold(weight, maskpt): ''' Compute the weight threshold for a single image or cube. From 741edb327318a12abc0ae2a9573b94eb93790db0 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 8 Jul 2024 15:17:22 -0400 Subject: [PATCH 03/21] update deps devdeps --- pyproject.toml | 2 ++ tox.ini | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 7a16bc84..2058b5a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,9 @@ classifiers = [ ] dependencies = [ "astropy >=5.0.4", + "drizzle>=1.15.0", "scipy >=1.7.2", + "scikit-image>=0.19", "numpy >=1.21.2", "opencv-python-headless >=4.6.0.66", "asdf >=2.15.0", diff --git a/tox.ini b/tox.ini index cc514980..da995287 100644 --- a/tox.ini +++ b/tox.ini @@ -60,10 +60,14 @@ deps = oldestdeps: minimum_dependencies devdeps: numpy>=0.0.dev0 devdeps: scipy>=0.0.dev0 + devdeps: scikit-image>=0.0.dev0 devdeps: pyerfa>=0.0.dev0 devdeps: astropy>=0.0.dev0 devdeps: requests>=0.0.dev0 devdeps: tweakwcs @ git+https://github.com/spacetelescope/tweakwcs.git + devdeps: asdf @ git+https://github.com/asdf-format/asdf.git + devdeps: drizzle @ git+https://github.com/spacetelescope/drizzle.git + devdeps: gwcs @ git+https://github.com/spacetelescope/gwcs.git use_develop = true pass_env = CI From 546fd07caaebb08de02f8060499c4e11c0fa6bde Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 8 Jul 2024 16:42:33 -0400 Subject: [PATCH 04/21] WIP: adding unit tests --- src/stcal/outlier_detection/utils.py | 34 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index ef834402..20c0af7e 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -18,8 +18,23 @@ def medfilt(arr, kern_size): - # scipy.signal.medfilt (and many other median filters) have undefined behavior - # for nan inputs. See: https://github.com/scipy/scipy/issues/4800 + """ + scipy.signal.medfilt (and many other median filters) have undefined behavior + for nan inputs. See: https://github.com/scipy/scipy/issues/4800 + + Parameters + ---------- + arr : numpy.ndarray + The input array + + kern_size : list of int + List of kernel dimensions, length must be equal to arr.ndim. + + Returns + ------- + filtered_arr : numpy.ndarray + Input array median filtered with a kernel of size kern_size + """ padded = np.pad(arr, [[k // 2] for k in kern_size]) windows = view_as_windows(padded, kern_size, np.ones(len(kern_size), dtype='int')) return np.nanmedian(windows, axis=np.arange(-len(kern_size), 0)) @@ -42,7 +57,6 @@ def compute_weight_threshold(weight, maskpt): float The weight threshold for this integration. ''' - # necessary in order to assure that mask gets applied correctly if hasattr(weight, '_mask'): del weight._mask @@ -84,18 +98,20 @@ def _absolute_subtract(array, tmp, out): return tmp, out +# TODO add tests def flag_cr( sci_data, sci_err, blot_data, snr1, - snr2, - scale1, - scale2, - backg, + snr2, # FIXME: unused for resample_data=False + scale1, # FIXME: unused for resample_data=False + scale2, # FIXME: unused for resample_data=False + backg, # FIXME: unused for resample_data=False resample_data, ): - """Masks outliers in science image by updating DQ in-place + """ + Masks outliers in science image by updating DQ in-place Mask blemishes in dithered data by comparing a science image with a model image and the derivative of the model image. @@ -204,6 +220,7 @@ def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): return outsci +# TODO tests, duplicate in resample, resample_utils def calc_gwcs_pixmap(in_wcs, out_wcs, shape): """ Return a pixel grid map from input frame to output frame. """ @@ -217,6 +234,7 @@ def calc_gwcs_pixmap(in_wcs, out_wcs, shape): return pixmap +# TODO tests, duplicate in resample, assign_wcs, resample_utils def reproject(wcs1, wcs2): """ Given two WCSs or transforms return a function which takes pixel From 17631e586b967ff2855231e523aa731cf77075ea Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 8 Jul 2024 17:15:25 -0400 Subject: [PATCH 05/21] more tests and some renaming --- tests/outlier_detection/__init__.py | 0 tests/outlier_detection/utils.py | 95 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tests/outlier_detection/__init__.py create mode 100644 tests/outlier_detection/utils.py diff --git a/tests/outlier_detection/__init__.py b/tests/outlier_detection/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/outlier_detection/utils.py b/tests/outlier_detection/utils.py new file mode 100644 index 00000000..c3522248 --- /dev/null +++ b/tests/outlier_detection/utils.py @@ -0,0 +1,95 @@ +import warnings + +import pytest +import numpy as np +import scipy.signal + +from stcal.outlier_detection.utils import ( + _abs_deriv, + compute_weight_threshold, + medfilt, +) + + +@pytest.mark.parametrize("shape,diff", [ + ([5, 5], 100), + ([7, 7], 200), +]) +def test_abs_deriv(shape, diff): + arr = np.zeros(shape) + # put diff at the center + np.put(arr, arr.size // 2, diff) + # since abs_deriv with a single non-zero value is the same as a + # convolution with a 3x3 cross kernel use it to test the result + expected = scipy.signal.convolve2d(arr, [[0, 1, 0], [1, 1, 1], [0, 1, 0]], mode='same') + result = _abs_deriv(arr) + np.testing.assert_allclose(result, expected) + + +@pytest.mark.parametrize("shape,mean,maskpt,expected", [ + ([5, 5], 11, 0.5, 5.5), + ([5, 5], 11, 0.25, 2.75), + ([3, 3, 3], 17, 0.5, 8.5), +]) +def test_compute_weight_threshold(shape, mean, maskpt, expected): + arr = np.ones(shape, dtype=np.float32) * mean + result = compute_weight_threshold(arr, maskpt) + np.testing.assert_allclose(result, expected) + + +def test_compute_weight_threshold_outlier(): + """ + Test that a large outlier doesn't bias the threshold + """ + arr = np.ones([7, 7, 7], dtype=np.float32) * 42 + arr[3, 3] = 9000 + result = compute_weight_threshold(arr, 0.5) + np.testing.assert_allclose(result, 21) + + +def test_compute_weight_threshold_zeros(): + """ + Test that zeros are ignored + """ + arr = np.zeros([10, 10], dtype=np.float32) + arr[:5, :5] = 42 + result = compute_weight_threshold(arr, 0.5) + np.testing.assert_allclose(result, 21) + + +@pytest.mark.parametrize("shape,kern_size", [ + ([7, 7], [3, 3]), + ([7, 7], [3, 1]), + ([7, 7], [1, 3]), + ([7, 5], [3, 3]), + ([5, 7], [3, 3]), + ([42, 42], [7, 7]), + ([42, 42], [7, 5]), + ([42, 42], [5, 7]), + ([42, 7, 5], [3, 3, 3]), + ([5, 7, 42], [5, 5, 5]), +]) +def test_medfilt_against_scipy(shape, kern_size): + arr = np.arange(np.prod(shape), dtype='uint32').reshape(shape) + result = medfilt(arr, kern_size) + + # The use of scipy.signal.medfilt is ok here ONLY because the + # input has no nans. See the medfilt docstring + expected = scipy.signal.medfilt(arr, kern_size) + + np.testing.assert_allclose(result, expected) + + +@pytest.mark.parametrize("arr,kern_size,expected", [ + ([2, np.nan, 0], [3], [1, 1, 0]), + ([np.nan, np.nan, np.nan], [3], [0, np.nan, 0]), +]) +def test_medfilt_nan(arr, kern_size, expected): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="All-NaN slice", + category=RuntimeWarning + ) + result = medfilt(arr, kern_size) + np.testing.assert_allclose(result, expected) From d9007b677b82ba47fb41bfe1a9a5826ef15282b2 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 08:47:14 -0400 Subject: [PATCH 06/21] renaming some functions --- src/stcal/outlier_detection/utils.py | 70 +++++++++++++++------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 20c0af7e..638f8790 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -75,6 +75,7 @@ def compute_weight_threshold(weight, maskpt): def _abs_deriv(array): """Take the absolute derivate of a numpy array.""" + # TODO is there a more efficient way to do this? tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) @@ -99,15 +100,27 @@ def _absolute_subtract(array, tmp, out): # TODO add tests -def flag_cr( +def flag_crs( + sci_data, + sci_err, + blot_data, + snr, +): + # straightforward detection of outliers for non-dithered data since + # err_data includes all noise sources (photon, read, and flat for baseline) + return np.greater(np.abs(sci_data - blot_data), snr * np.nan_to_num(sci_err)) + + +# TODO add tests +def flag_resampled_crs( sci_data, sci_err, blot_data, snr1, - snr2, # FIXME: unused for resample_data=False - scale1, # FIXME: unused for resample_data=False - scale2, # FIXME: unused for resample_data=False - backg, # FIXME: unused for resample_data=False + snr2, + scale1, + scale2, + backg, resample_data, ): """ @@ -149,39 +162,31 @@ def flag_cr( However, this is not currently needed, as CubeModels are only passed in for TSO data, where resampling is always False. """ + if not resample_data: + return flag_crs(sci_data, sci_err, blot_data, snr1) err_data = np.nan_to_num(sci_err) - # create the outlier mask - if resample_data: # dithered outlier detection - blot_deriv = _abs_deriv(blot_data) - diff_noise = np.abs(sci_data - blot_data - backg) - - # Create a boolean mask based on a scaled version of - # the derivative image (dealing with interpolating issues?) - # and the standard n*sigma above the noise - threshold1 = scale1 * blot_deriv + snr1 * err_data - mask1 = np.greater(diff_noise, threshold1) - - # Smooth the boolean mask with a 3x3 boxcar kernel - kernel = np.ones((3, 3), dtype=int) - mask1_smoothed = ndimage.convolve(mask1, kernel, mode='nearest') - - # Create a 2nd boolean mask based on the 2nd set of - # scale and threshold values - threshold2 = scale2 * blot_deriv + snr2 * err_data - mask2 = np.greater(diff_noise, threshold2) + # TODO this could be optimized to not make as many temporary arrays... + blot_deriv = _abs_deriv(blot_data) + diff_noise = np.abs(sci_data - blot_data - backg) - # Final boolean mask - cr_mask = mask1_smoothed & mask2 + # Create a boolean mask based on a scaled version of + # the derivative image (dealing with interpolating issues?) + # and the standard n*sigma above the noise + threshold1 = scale1 * blot_deriv + snr1 * err_data + mask1 = np.greater(diff_noise, threshold1) - else: # stack outlier detection - diff_noise = np.abs(sci_data - blot_data) + # Smooth the boolean mask with a 3x3 boxcar kernel + kernel = np.ones((3, 3), dtype=int) + mask1_smoothed = ndimage.convolve(mask1, kernel, mode='nearest') - # straightforward detection of outliers for non-dithered data since - # err_data includes all noise sources (photon, read, and flat for baseline) - cr_mask = np.greater(diff_noise, snr1 * err_data) + # Create a 2nd boolean mask based on the 2nd set of + # scale and threshold values + threshold2 = scale2 * blot_deriv + snr2 * err_data + mask2 = np.greater(diff_noise, threshold2) - return cr_mask + # Final boolean mask + return mask1_smoothed & mask2 # FIXME (or fixed) interp and sinscl were "options" only when provided @@ -228,7 +233,6 @@ def calc_gwcs_pixmap(in_wcs, out_wcs, shape): log.debug("Bounding box from data shape: {}".format(bb)) grid = gwcs.wcstools.grid_from_bounding_box(bb) - # TODO does stcal reproject work? pixmap = np.dstack(reproject(in_wcs, out_wcs)(grid[0], grid[1])) return pixmap From a957d99d12a5d7a79e45759bf4cbc60b235a00fd Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 08:48:58 -0400 Subject: [PATCH 07/21] rename file --- tests/outlier_detection/{utils.py => test_utils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/outlier_detection/{utils.py => test_utils.py} (100%) diff --git a/tests/outlier_detection/utils.py b/tests/outlier_detection/test_utils.py similarity index 100% rename from tests/outlier_detection/utils.py rename to tests/outlier_detection/test_utils.py From d7936bfc944f774a622914d738d9f6e753d9c6f2 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 10:13:04 -0400 Subject: [PATCH 08/21] change back to old abs_deriv --- src/stcal/outlier_detection/utils.py | 29 ++++++++++++++++++++++++--- tests/outlier_detection/test_utils.py | 16 +++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 638f8790..2e55fc12 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -73,9 +73,33 @@ def compute_weight_threshold(weight, maskpt): return weight_threshold -def _abs_deriv(array): +def _valid_abs_deriv(array): """Take the absolute derivate of a numpy array.""" - # TODO is there a more efficient way to do this? + out = np.zeros_like(array) # use same dtype as input + + # compute row-wise absolute diffference + d = np.abs(np.diff(array, axis=0)) + out[1:] = d # no need to do max yet + # since these are absolute differences |r0-r1| = |r1-r0| + # make a view of the target portion of the array + v = out[:-1] + # compute an in-place maximum + np.putmask(v, d > v, d) + + # compute col-wise absolute difference + d = np.abs(np.diff(array, axis=1)) + v = out[:, 1:] + np.putmask(v, d > v, d) + v = out[:, :-1] + np.putmask(v, d > v, d) + return out + + +def _abs_deriv(array): + # FIXME this assumes off-edge pixels are 0 + # FIXME this upcasts to float64 + # FIXME _valid_abs_deriv fixes the above issues and is more efficient + # but fixing the bugs will likely change the output tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) @@ -166,7 +190,6 @@ def flag_resampled_crs( return flag_crs(sci_data, sci_err, blot_data, snr1) err_data = np.nan_to_num(sci_err) - # TODO this could be optimized to not make as many temporary arrays... blot_deriv = _abs_deriv(blot_data) diff_noise = np.abs(sci_data - blot_data - backg) diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index c3522248..6e030211 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -12,20 +12,28 @@ @pytest.mark.parametrize("shape,diff", [ - ([5, 5], 100), - ([7, 7], 200), + ([5, 7], 100), + ([17, 13], -200), ]) -def test_abs_deriv(shape, diff): +def test_abs_deriv_single_value(shape, diff): arr = np.zeros(shape) # put diff at the center np.put(arr, arr.size // 2, diff) # since abs_deriv with a single non-zero value is the same as a # convolution with a 3x3 cross kernel use it to test the result - expected = scipy.signal.convolve2d(arr, [[0, 1, 0], [1, 1, 1], [0, 1, 0]], mode='same') + expected = scipy.signal.convolve2d(np.abs(arr), [[0, 1, 0], [1, 1, 1], [0, 1, 0]], mode='same') result = _abs_deriv(arr) np.testing.assert_allclose(result, expected) +@pytest.mark.skip(reason="_abs_deriv has edge effects due to treating off-edge pixels as 0") +@pytest.mark.parametrize("nrows,ncols", [(5, 5), (7, 11), (17, 13)]) +def test_abs_deriv_range(nrows, ncols): + arr = np.arange(nrows * ncols).reshape(nrows, ncols) + result = _abs_deriv(arr) + np.testing.assert_allclose(result, ncols) + + @pytest.mark.parametrize("shape,mean,maskpt,expected", [ ([5, 5], 11, 0.5, 5.5), ([5, 5], 11, 0.25, 2.75), From a57d618f638425842b9b20c01b048f0d71489e94 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:02:23 -0400 Subject: [PATCH 09/21] more tests --- src/stcal/outlier_detection/utils.py | 42 +------------- tests/outlier_detection/test_utils.py | 82 ++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 2e55fc12..abebd013 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -73,32 +73,7 @@ def compute_weight_threshold(weight, maskpt): return weight_threshold -def _valid_abs_deriv(array): - """Take the absolute derivate of a numpy array.""" - out = np.zeros_like(array) # use same dtype as input - - # compute row-wise absolute diffference - d = np.abs(np.diff(array, axis=0)) - out[1:] = d # no need to do max yet - # since these are absolute differences |r0-r1| = |r1-r0| - # make a view of the target portion of the array - v = out[:-1] - # compute an in-place maximum - np.putmask(v, d > v, d) - - # compute col-wise absolute difference - d = np.abs(np.diff(array, axis=1)) - v = out[:, 1:] - np.putmask(v, d > v, d) - v = out[:, :-1] - np.putmask(v, d > v, d) - return out - - def _abs_deriv(array): - # FIXME this assumes off-edge pixels are 0 - # FIXME this upcasts to float64 - # FIXME _valid_abs_deriv fixes the above issues and is more efficient # but fixing the bugs will likely change the output tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) @@ -123,7 +98,6 @@ def _absolute_subtract(array, tmp, out): return tmp, out -# TODO add tests def flag_crs( sci_data, sci_err, @@ -135,7 +109,6 @@ def flag_crs( return np.greater(np.abs(sci_data - blot_data), snr * np.nan_to_num(sci_err)) -# TODO add tests def flag_resampled_crs( sci_data, sci_err, @@ -212,10 +185,6 @@ def flag_resampled_crs( return mask1_smoothed & mask2 -# FIXME (or fixed) interp and sinscl were "options" only when provided -# as part of the step spec (which becomes outlierpars). As neither was -# in the spec (and providing unknown arguments causes an error), these -# were never configurable and always defaulted to linear and 1.0 def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): """ Resample the output/resampled image to recreate an input image based on @@ -229,7 +198,6 @@ def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): Datamodel containing header and WCS to define the 'blotted' image """ # Compute the mapping between the input and output pixel coordinates - # TODO stcal.alignment.resample_utils.calc_pixmap does not work here pixmap = calc_gwcs_pixmap(blot_wcs, median_wcs, blot_data.shape) log.debug("Pixmap shape: {}".format(pixmap[:, :, 0].shape)) log.debug("Sci shape: {}".format(blot_data.shape)) @@ -248,20 +216,16 @@ def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): return outsci -# TODO tests, duplicate in resample, resample_utils -def calc_gwcs_pixmap(in_wcs, out_wcs, shape): +def calc_gwcs_pixmap(in_wcs, out_wcs, in_shape): """ Return a pixel grid map from input frame to output frame. """ - bb = wcs_bbox_from_shape(shape) + bb = wcs_bbox_from_shape(in_shape) log.debug("Bounding box from data shape: {}".format(bb)) grid = gwcs.wcstools.grid_from_bounding_box(bb) - pixmap = np.dstack(reproject(in_wcs, out_wcs)(grid[0], grid[1])) - - return pixmap + return np.dstack(reproject(in_wcs, out_wcs)(grid[0], grid[1])) -# TODO tests, duplicate in resample, assign_wcs, resample_utils def reproject(wcs1, wcs2): """ Given two WCSs or transforms return a function which takes pixel diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index 6e030211..bcfdaf72 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -1,12 +1,19 @@ import warnings +import gwcs import pytest import numpy as np import scipy.signal +from astropy.modeling import models from stcal.outlier_detection.utils import ( _abs_deriv, compute_weight_threshold, + flag_crs, + flag_resampled_crs, + gwcs_blot, + calc_gwcs_pixmap, + reproject, medfilt, ) @@ -26,7 +33,7 @@ def test_abs_deriv_single_value(shape, diff): np.testing.assert_allclose(result, expected) -@pytest.mark.skip(reason="_abs_deriv has edge effects due to treating off-edge pixels as 0") +@pytest.mark.skip(reason="_abs_deriv has edge effects due to treating off-edge pixels as 0: see JP-3683") @pytest.mark.parametrize("nrows,ncols", [(5, 5), (7, 11), (17, 13)]) def test_abs_deriv_range(nrows, ncols): arr = np.arange(nrows * ncols).reshape(nrows, ncols) @@ -65,6 +72,79 @@ def test_compute_weight_threshold_zeros(): np.testing.assert_allclose(result, 21) +def test_flag_crs(): + sci = np.zeros((10, 10), dtype=np.float32) + err = np.ones_like(sci) + blot = np.zeros_like(sci) + # add a cr + sci[2, 3] = 10 + crs = flag_crs(sci, err, blot, 1) + ys, xs = np.where(crs) + np.testing.assert_equal(ys, 2) + np.testing.assert_equal(xs, 3) + + +def test_flag_resampled_crs(): + sci = np.zeros((10, 10), dtype=np.float32) + err = np.ones_like(sci) + blot = np.zeros_like(sci) + # add a cr + sci[2, 3] = 10 + + snr1, snr2 = 5, 4 + scale1, scale2 = 1.2, 0.7 + backg = 0.0 + resample = True + crs = flag_resampled_crs(sci, err, blot, snr1, snr2, scale1, scale2, backg, resample) + np.testing.assert_equal(ys, 2) + np.testing.assert_equal(xs, 3) + + +def test_gwcs_blot(): + # set up a very simple wcs that scales by 1x + output_frame = gwcs.Frame2D(name="world") + forward_transform = models.Scale(1) & models.Scale(1) + + median_data = np.arange(100, dtype=np.float32).reshape((10, 10)) + median_wcs = gwcs.WCS(forward_transform, output_frame=output_frame) + blot_data = np.zeros((5, 5), dtype=np.float32) + blot_wcs = gwcs.WCS(forward_transform, output_frame=output_frame) + pix_ratio = 1.0 + + blotted = gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio) + # since the median data is larger and the wcs are equivalent the blot + # will window the data to the shape of the blot data + assert blotted.shape == blot_data.shape + np.testing.assert_equal(blotted, median_data[:blot_data.shape[0], :blot_data.shape[1]]) + + +def test_calc_gwcs_pixmap(): + # generate 2 wcses with different scales + output_frame = gwcs.Frame2D(name="world") + in_transform = models.Scale(1) & models.Scale(1) + out_transform = models.Scale(2) & models.Scale(2) + in_wcs = gwcs.WCS(in_transform, output_frame=output_frame) + out_wcs = gwcs.WCS(out_transform, output_frame=output_frame) + in_shape = (3, 4) + pixmap = calc_gwcs_pixmap(in_wcs, out_wcs, in_shape) + # we expect given the 2x scale difference to have a pixmap + # with pixel coordinates / 2 + # use mgrid to generate these coordinates (and reshuffle to match the pixmap) + expected = np.swapaxes(np.mgrid[:4, :3] / 2., 0, 2) + np.testing.assert_equal(pixmap, expected) + + +def test_reproject(): + # generate 2 wcses with different scales + output_frame = gwcs.Frame2D(name="world") + wcs1 = gwcs.WCS(models.Scale(1) & models.Scale(1), output_frame=output_frame) + wcs2 = gwcs.WCS(models.Scale(2) & models.Scale(2), output_frame=output_frame) + project = reproject(wcs1, wcs2) + pys, pxs = project(np.array([3]), np.array([1])) + np.testing.assert_equal(pys, 1.5) + np.testing.assert_equal(pxs, 0.5) + + @pytest.mark.parametrize("shape,kern_size", [ ([7, 7], [3, 3]), ([7, 7], [3, 1]), From 6080156efd79596b451e0c9f62e4130c952137e3 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:11:02 -0400 Subject: [PATCH 10/21] fix test --- tests/outlier_detection/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index bcfdaf72..963ebb7d 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -96,6 +96,7 @@ def test_flag_resampled_crs(): backg = 0.0 resample = True crs = flag_resampled_crs(sci, err, blot, snr1, snr2, scale1, scale2, backg, resample) + ys, xs = np.where(crs) np.testing.assert_equal(ys, 2) np.testing.assert_equal(xs, 3) From 252ee7cbabe1a44ff0a5026345d684b6f229c421 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:12:11 -0400 Subject: [PATCH 11/21] add changelog --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5203efd1..f0e59c85 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,7 +9,8 @@ General Changes to API -------------- -- +- Add ``outlier_detection`` submodule with ``utils`` included + from jwst. [#270] Bug Fixes --------- From 63b84f048b4a6789bf17a6ec5bebf985b9a7d1da Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:14:24 -0400 Subject: [PATCH 12/21] add docs --- docs/stcal/outlier_detection/description.rst | 4 ++++ docs/stcal/outlier_detection/index.rst | 12 ++++++++++++ docs/stcal/package_index.rst | 1 + 3 files changed, 17 insertions(+) create mode 100644 docs/stcal/outlier_detection/description.rst create mode 100644 docs/stcal/outlier_detection/index.rst diff --git a/docs/stcal/outlier_detection/description.rst b/docs/stcal/outlier_detection/description.rst new file mode 100644 index 00000000..5299257b --- /dev/null +++ b/docs/stcal/outlier_detection/description.rst @@ -0,0 +1,4 @@ +Description +============ + +This sub-package contains functions useful for outlier detection. diff --git a/docs/stcal/outlier_detection/index.rst b/docs/stcal/outlier_detection/index.rst new file mode 100644 index 00000000..e3deaa59 --- /dev/null +++ b/docs/stcal/outlier_detection/index.rst @@ -0,0 +1,12 @@ +.. _outlier_detection: + +======================= +Outlier Detection Utils +======================= + +.. toctree:: + :maxdepth: 2 + + description.rst + +.. automodapi:: stcal.outlier_detection diff --git a/docs/stcal/package_index.rst b/docs/stcal/package_index.rst index 5af815e2..e1db02b0 100644 --- a/docs/stcal/package_index.rst +++ b/docs/stcal/package_index.rst @@ -8,3 +8,4 @@ Package Index ramp_fitting/index.rst alignment/index.rst tweakreg/index.rst + outlier_detection/index.rst From fd738a83d21a5f2bc861ae749fe68b3474a43dec Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:37:40 -0400 Subject: [PATCH 13/21] update docstrings --- src/stcal/outlier_detection/utils.py | 138 ++++++++++++++++++-------- tests/outlier_detection/test_utils.py | 8 +- 2 files changed, 102 insertions(+), 44 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index abebd013..d75d7875 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -104,8 +104,29 @@ def flag_crs( blot_data, snr, ): - # straightforward detection of outliers for non-dithered data since - # err_data includes all noise sources (photon, read, and flat for baseline) + """ + Straightforward detection of outliers for non-dithered data since + err_data includes all noise sources (photon, read, and flat for baseline). + + Parameters + ---------- + sci_data : numpy.ndarray + "Science" data possibly containing outliers. + + sci_err : numpy.ndarray + Error estimates for sci_data. + + blot_data : numpy.ndarray + Reference data used to detect outliers. + + snr : float + Signal-to-noise ratio used during detection. + + Returns + ------- + cr_mask ; numpy.ndarray + Boolean array where outliers (CRs) are true. + """ return np.greater(np.abs(sci_data - blot_data), snr * np.nan_to_num(sci_err)) @@ -121,43 +142,44 @@ def flag_resampled_crs( resample_data, ): """ - Masks outliers in science image by updating DQ in-place - - Mask blemishes in dithered data by comparing a science image - with a model image and the derivative of the model image. + Detect outliers (CRs) using resampled reference data. Parameters ---------- - FIXME: update these + sci_data : numpy.ndarray + "Science" data possibly containing outliers + + sci_err : numpy.ndarray + Error estimates for sci_data - sci_image : ~jwst.datamodels.ImageModel - the science data. Can also accept a CubeModel, but only if - resample_data is False + blot_data : numpy.ndarray + Reference data used to detect outliers. If this reference data + was generated through resampling resample_data should be True. - blot_array : np.ndarray - the blotted median image of the dithered science frames. + snr1 : float + Signal-to-noise ratio threshold used prior to smoothing. - snr : str - Signal-to-noise ratio + snr2 : float + Signal-to-noise ratio threshold used after smoothing. - scale : str - scaling factor applied to the derivative + scale1 : float + Scale used prior to smoothing. + + scale2 : float + Scale used after smoothing. backg : float - Background value (scalar) to subtract + Scalar background to subtract from the difference. resample_data : bool - Boolean to indicate whether blot_image is created from resampled, - dithered data or not - - Notes - ----- - Accepting a CubeModel for sci_image and blot_image with resample_data=True - appears to be a relatively simple extension, as the only thing that explicitly - relies on the dimensionality is the kernel, which could be generalized. - However, this is not currently needed, as CubeModels are only passed in for - TSO data, where resampling is always False. + If False skip the resampled data algorithm and instead call + `flag_crs` with a limited set of these parameters. + + Returns + ------- + cr_mask ; numpy.ndarray + boolean array where outliers (CRs) are true """ if not resample_data: return flag_crs(sci_data, sci_err, blot_data, snr1) @@ -185,25 +207,43 @@ def flag_resampled_crs( return mask1_smoothed & mask2 -def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): +def gwcs_blot(median_data, median_wcs, blot_shape, blot_wcs, pix_ratio): """ - Resample the output/resampled image to recreate an input image based on - the input image's world coordinate system + Resample the median data to recreate an input image based on + the blot wcs. Parameters ---------- - median_model : `~stdatamodels.jwst.datamodels.JwstDataModel` + median_data : numpy.ndarray + The data to blot. + + median_wcs : gwcs.WCS + The wcs for the median data. + + blot_shape : sequence of ints + The target blot data shape. + + blot_wcs : gwcs.WCS + The target/blotted wcs. + + pix_ratio : float + Pixel ratio. + + Returns + ------- + blotted : numpy.ndarray + The blotted median data. blot_img : datamodel Datamodel containing header and WCS to define the 'blotted' image """ # Compute the mapping between the input and output pixel coordinates - pixmap = calc_gwcs_pixmap(blot_wcs, median_wcs, blot_data.shape) + pixmap = calc_gwcs_pixmap(blot_wcs, median_wcs, blot_shape) log.debug("Pixmap shape: {}".format(pixmap[:, :, 0].shape)) - log.debug("Sci shape: {}".format(blot_data.shape)) - log.info('Blotting {} <-- {}'.format(blot_data.shape, median_data.shape)) + log.debug("Sci shape: {}".format(blot_shape)) + log.info('Blotting {} <-- {}'.format(blot_shape, median_data.shape)) - outsci = np.zeros(blot_data.shape, dtype=np.float32) + outsci = np.zeros(blot_shape, dtype=np.float32) # Currently tblot cannot handle nans in the pixmap, so we need to give some # other value. -1 is not optimal and may have side effects. But this is @@ -217,7 +257,24 @@ def gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio): def calc_gwcs_pixmap(in_wcs, out_wcs, in_shape): - """ Return a pixel grid map from input frame to output frame. + """ + Return a pixel grid map from input frame to output frame. + + Parameters + ---------- + in_wcs : gwcs.WCS + Input/source wcs. + + out_wcs : gwcs.WCS + Output/projected wcs. + + in_shape : sequence of ints + Input shape used to compute the input bounding box. + + Returns + ------- + pixmap : numpy.ndarray + Computed pixmap. """ bb = wcs_bbox_from_shape(in_shape) log.debug("Bounding box from data shape: {}".format(bb)) @@ -228,14 +285,15 @@ def calc_gwcs_pixmap(in_wcs, out_wcs, in_shape): def reproject(wcs1, wcs2): """ - Given two WCSs or transforms return a function which takes pixel - coordinates in the first WCS or transform and computes them in the second - one. It performs the forward transformation of ``wcs1`` followed by the + Given two WCSs return a function which takes pixel + coordinates in wcs1 and computes them in wcs2. + + It performs the forward transformation of ``wcs1`` followed by the inverse of ``wcs2``. Parameters ---------- - wcs1, wcs2 : `~astropy.wcs.WCS` or `~gwcs.wcs.WCS` + wcs1, wcs2 : gwcs.WCS WCS objects that have `pixel_to_world_values` and `world_to_pixel_values` methods. diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index 963ebb7d..60151986 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -108,15 +108,15 @@ def test_gwcs_blot(): median_data = np.arange(100, dtype=np.float32).reshape((10, 10)) median_wcs = gwcs.WCS(forward_transform, output_frame=output_frame) - blot_data = np.zeros((5, 5), dtype=np.float32) + blot_shape = (5, 5) blot_wcs = gwcs.WCS(forward_transform, output_frame=output_frame) pix_ratio = 1.0 - blotted = gwcs_blot(median_data, median_wcs, blot_data, blot_wcs, pix_ratio) + blotted = gwcs_blot(median_data, median_wcs, blot_shape, blot_wcs, pix_ratio) # since the median data is larger and the wcs are equivalent the blot # will window the data to the shape of the blot data - assert blotted.shape == blot_data.shape - np.testing.assert_equal(blotted, median_data[:blot_data.shape[0], :blot_data.shape[1]]) + assert blotted.shape == blot_shape + np.testing.assert_equal(blotted, median_data[:blot_shape[0], :blot_shape[1]]) def test_calc_gwcs_pixmap(): From fc59880ada650e4c404778adfc43a3fad37b8c57 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 9 Jul 2024 20:47:42 -0400 Subject: [PATCH 14/21] docs fixes --- docs/stcal/outlier_detection/index.rst | 2 +- src/stcal/outlier_detection/utils.py | 31 +++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/docs/stcal/outlier_detection/index.rst b/docs/stcal/outlier_detection/index.rst index e3deaa59..7c3bac07 100644 --- a/docs/stcal/outlier_detection/index.rst +++ b/docs/stcal/outlier_detection/index.rst @@ -9,4 +9,4 @@ Outlier Detection Utils description.rst -.. automodapi:: stcal.outlier_detection +.. automodapi:: stcal.outlier_detection.utils diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index d75d7875..5505fda1 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -17,6 +17,17 @@ log.setLevel(logging.DEBUG) +__all__ = [ + "medfilt", + "compute_weight_threshold", + "flag_crs", + "flag_resampled_crs", + "gwcs_blot", + "calc_gwcs_pixmap", + "reproject", +] + + def medfilt(arr, kern_size): """ scipy.signal.medfilt (and many other median filters) have undefined behavior @@ -124,7 +135,7 @@ def flag_crs( Returns ------- - cr_mask ; numpy.ndarray + cr_mask : numpy.ndarray Boolean array where outliers (CRs) are true. """ return np.greater(np.abs(sci_data - blot_data), snr * np.nan_to_num(sci_err)) @@ -178,7 +189,7 @@ def flag_resampled_crs( Returns ------- - cr_mask ; numpy.ndarray + cr_mask : numpy.ndarray boolean array where outliers (CRs) are true """ if not resample_data: @@ -217,13 +228,13 @@ def gwcs_blot(median_data, median_wcs, blot_shape, blot_wcs, pix_ratio): median_data : numpy.ndarray The data to blot. - median_wcs : gwcs.WCS + median_wcs : gwcs.wcs.WCS The wcs for the median data. - blot_shape : sequence of ints + blot_shape : list of int The target blot data shape. - blot_wcs : gwcs.WCS + blot_wcs : gwcs.wcs.WCS The target/blotted wcs. pix_ratio : float @@ -262,13 +273,13 @@ def calc_gwcs_pixmap(in_wcs, out_wcs, in_shape): Parameters ---------- - in_wcs : gwcs.WCS + in_wcs : gwcs.wcs.WCS Input/source wcs. - out_wcs : gwcs.WCS + out_wcs : gwcs.wcs.WCS Output/projected wcs. - in_shape : sequence of ints + in_shape : list of int Input shape used to compute the input bounding box. Returns @@ -293,13 +304,13 @@ def reproject(wcs1, wcs2): Parameters ---------- - wcs1, wcs2 : gwcs.WCS + wcs1, wcs2 : gwcs.wcs.WCS WCS objects that have `pixel_to_world_values` and `world_to_pixel_values` methods. Returns ------- - _reproject : func + _reproject : Function to compute the transformations. It takes x, y positions in ``wcs1`` and returns x, y positions in ``wcs2``. """ From f3d34c20a289aa5f2f9172b15b2d33a2e63e8a7f Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 11 Jul 2024 16:31:19 -0400 Subject: [PATCH 15/21] remove partial comment --- src/stcal/outlier_detection/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 5505fda1..0bb727c6 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -85,7 +85,6 @@ def compute_weight_threshold(weight, maskpt): def _abs_deriv(array): - # but fixing the bugs will likely change the output tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) From 4ebbeecf38e92a440c18df137ea1befcc4440e49 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 11 Jul 2024 16:32:48 -0400 Subject: [PATCH 16/21] fix docstring --- src/stcal/outlier_detection/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 0bb727c6..de75bf6f 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -116,7 +116,7 @@ def flag_crs( ): """ Straightforward detection of outliers for non-dithered data since - err_data includes all noise sources (photon, read, and flat for baseline). + sci_err includes all noise sources (photon, read, and flat for baseline). Parameters ---------- From 20207b101be16dd060caa88d1ffc75998375fcbc Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 18 Jul 2024 13:10:05 -0400 Subject: [PATCH 17/21] remove warning filter Tests were unable to produce a situation where the filtered warning could be produced. The goal was to add a more specific filter (one that uses a message). If the warning can be reproduced the filter with a message can be re-added. It's possible this warning is no longer produced and so this commit removes the filter. --- src/stcal/outlier_detection/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index de75bf6f..7a7e00f4 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -325,10 +325,7 @@ def _reproject(x, y): flat_sky = [] for axis in sky: flat_sky.append(axis.flatten()) - # Filter out RuntimeWarnings due to computed NaNs in the WCS - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - det = backward_transform(*tuple(flat_sky)) + det = backward_transform(*tuple(flat_sky)) det_reshaped = [] for axis in det: det_reshaped.append(axis.reshape(x.shape)) From 61aeee8a86fe01b7f651d3ece2caf60a885e533a Mon Sep 17 00:00:00 2001 From: Brett Date: Fri, 26 Jul 2024 08:53:53 -0400 Subject: [PATCH 18/21] remove resample_data boolean argument --- src/stcal/outlier_detection/utils.py | 10 +--------- tests/outlier_detection/test_utils.py | 3 +-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 7a7e00f4..0cb9dd33 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -149,7 +149,6 @@ def flag_resampled_crs( scale1, scale2, backg, - resample_data, ): """ Detect outliers (CRs) using resampled reference data. @@ -164,8 +163,7 @@ def flag_resampled_crs( Error estimates for sci_data blot_data : numpy.ndarray - Reference data used to detect outliers. If this reference data - was generated through resampling resample_data should be True. + Reference data used to detect outliers. snr1 : float Signal-to-noise ratio threshold used prior to smoothing. @@ -182,17 +180,11 @@ def flag_resampled_crs( backg : float Scalar background to subtract from the difference. - resample_data : bool - If False skip the resampled data algorithm and instead call - `flag_crs` with a limited set of these parameters. - Returns ------- cr_mask : numpy.ndarray boolean array where outliers (CRs) are true """ - if not resample_data: - return flag_crs(sci_data, sci_err, blot_data, snr1) err_data = np.nan_to_num(sci_err) blot_deriv = _abs_deriv(blot_data) diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index 60151986..c602a89c 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -94,8 +94,7 @@ def test_flag_resampled_crs(): snr1, snr2 = 5, 4 scale1, scale2 = 1.2, 0.7 backg = 0.0 - resample = True - crs = flag_resampled_crs(sci, err, blot, snr1, snr2, scale1, scale2, backg, resample) + crs = flag_resampled_crs(sci, err, blot, snr1, snr2, scale1, scale2, backg) ys, xs = np.where(crs) np.testing.assert_equal(ys, 2) np.testing.assert_equal(xs, 3) From 573b5d9f46d2fdfdc44f5884b9dde7e575b2bd1f Mon Sep 17 00:00:00 2001 From: Brett Date: Fri, 26 Jul 2024 08:58:28 -0400 Subject: [PATCH 19/21] add mypy exclusion for drizzle --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2058b5a7..978e98fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -211,6 +211,7 @@ module = [ "stdatamodels.*", "asdf.*", "scipy.*", + "drizzle.*", # don't complain about the installed c parts of this library "stcal.ramp_fitting.ols_cas22._fit", "stcal.ramp_fitting.ols_cas22._jump", From aa64f1d6b61de31e8ad490efabda9a61fa3f89a3 Mon Sep 17 00:00:00 2001 From: Brett Date: Fri, 26 Jul 2024 09:03:55 -0400 Subject: [PATCH 20/21] copy docstring from jwst --- src/stcal/outlier_detection/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 0cb9dd33..85bd89ce 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -85,6 +85,7 @@ def compute_weight_threshold(weight, maskpt): def _abs_deriv(array): + """Take the absolute derivate of a numpy array.""" tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) From 2112773e603fac985c28f603eb6cc1bdd168adf3 Mon Sep 17 00:00:00 2001 From: Brett Date: Fri, 26 Jul 2024 09:12:44 -0400 Subject: [PATCH 21/21] update docstrings --- src/stcal/outlier_detection/utils.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 85bd89ce..964de5fe 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -85,7 +85,15 @@ def compute_weight_threshold(weight, maskpt): def _abs_deriv(array): - """Take the absolute derivate of a numpy array.""" + """ + Do not use this function. + + Take the absolute derivative of a numpy array. + + This function assumes off-edge pixel values are 0 + and leads to erroneous derivative values and should + likely not be used. + """ tmp = np.zeros(array.shape, dtype=np.float64) out = np.zeros(array.shape, dtype=np.float64) @@ -103,6 +111,11 @@ def _abs_deriv(array): def _absolute_subtract(array, tmp, out): + """ + Do not use this function. + + A helper function for _abs_deriv. + """ tmp = np.abs(array - tmp) out = np.maximum(tmp, out) tmp = tmp * 0.