From fd36ec959cbd82d1b6e65a121c4ebb93af9c2c52 Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Fri, 24 Aug 2018 09:38:29 +0200 Subject: [PATCH] ENH: Implement dynamic binning Add the possibility to enable dynamic binning, which scales the bin width by the ratio between the range of intensities seen in the derived and original images (only including intensities in the ROI). This is only done if dynamic binning is enabled (new parameter `dynamicBinning`, boolean, default false) and no custom bin width has been defined for that filter. This also has no effect if a fixed bin count is used. Addresses issue #49. --- docs/customization.rst | 3 ++ radiomics/base.py | 3 +- radiomics/featureextractor.py | 9 ++++ radiomics/firstorder.py | 3 +- radiomics/imageoperations.py | 83 ++++++++++++++++++++---------- radiomics/schemas/paramSchema.yaml | 2 + 6 files changed, 75 insertions(+), 28 deletions(-) diff --git a/docs/customization.rst b/docs/customization.rst index f0f565f3..ff1c7be0 100644 --- a/docs/customization.rst +++ b/docs/customization.rst @@ -280,6 +280,9 @@ Feature Class Level - ``binCount`` [None]: integer, > 0, specifies the number of bins to create. The width of the bin is then determined by the range in the ROI. No definitive evidence is available on which method of discretization is superior, we advise a fixed bin width. See more :ref:`here `. +- ``dynamicBinning`` [False]: Boolean, if set to true, scales the bin width for derived images by the ratio of the range + in the image (ROI) and the range of the original image (ROI). This setting has no effect when a fixed bin count is + used, or when a custom bin width has been specified for the filter. See also :py:func:`getBinEdges()`. *Forced 2D extraction* diff --git a/radiomics/base.py b/radiomics/base.py index 2bbb1b64..b22d7226 100644 --- a/radiomics/base.py +++ b/radiomics/base.py @@ -180,7 +180,8 @@ def _applyBinning(self): self.matrix, self.binEdges = imageoperations.binImage(self.binWidth, self.imageArray, self.maskArray, - self.settings.get('binCount', None)) + self.settings.get('binCount', None), + self.settings.get('dynamic_ref_range', None)) self.coefficients['grayLevels'] = numpy.unique(self.matrix[self.maskArray]) self.coefficients['Ng'] = int(numpy.max(self.coefficients['grayLevels'])) # max gray level in the ROI diff --git a/radiomics/featureextractor.py b/radiomics/featureextractor.py index a0a123ca..3d2a4ad7 100644 --- a/radiomics/featureextractor.py +++ b/radiomics/featureextractor.py @@ -434,6 +434,13 @@ def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False): # Resegmentation successful mask = resegmentedMask + dynamic_ref_range = None + if self.settings.get('dynamicBinning', False): + im_arr = sitk.GetArrayFromImage(image) + ma_arr = sitk.GetArrayFromImage(mask) == self.settings.get('label', 1) + target_voxel_arr = im_arr[ma_arr] + dynamic_ref_range = max(target_voxel_arr) - min(target_voxel_arr) + # 6. Calculate other enabled feature classes using enabled image types # Make generators for all enabled image types self.logger.debug('Creating image type iterator') @@ -442,6 +449,8 @@ def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False): args = self.settings.copy() args.update(customKwargs) self.logger.info('Adding image type "%s" with custom settings: %s' % (imageType, str(customKwargs))) + if 'binWidth' not in customKwargs and imageType != 'Original': + args['dynamic_ref_range'] = dynamic_ref_range imageGenerators = chain(imageGenerators, getattr(imageoperations, 'get%sImage' % imageType)(image, mask, **args)) self.logger.debug('Extracting features') diff --git a/radiomics/firstorder.py b/radiomics/firstorder.py index 042ce2d5..845191f9 100644 --- a/radiomics/firstorder.py +++ b/radiomics/firstorder.py @@ -58,7 +58,8 @@ def _getDiscretizedTargetVoxelArray(self): if self.binCount is not None: binEdges = self.binCount else: - binEdges = imageoperations.getBinEdges(self.binWidth, self.targetVoxelArray) + binEdges = imageoperations.getBinEdges(self.binWidth, self.targetVoxelArray, + dynamic_ref_range=self.settings.get('dynamic_ref_range', None)) self.discretizedTargetVoxelArray = numpy.histogram(self.targetVoxelArray, binEdges)[0] diff --git a/radiomics/imageoperations.py b/radiomics/imageoperations.py index aa63d671..1053ece2 100644 --- a/radiomics/imageoperations.py +++ b/radiomics/imageoperations.py @@ -11,14 +11,46 @@ logger = logging.getLogger(__name__) -def getBinEdges(binwidth, parameterValues): +def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): r""" Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image). Parameter ``binWidth`` determines the fixed width of each bin. This ensures comparable voxels after binning, a fixed bin count would be dependent on the intensity range in the segmentation. Returns the bin edges, a list of the edges of the calculated bins, length is N(bins) + 1. Bins are defined such, that - the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`. + the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`: + + .. math:: + X_{b, i} = \lfloor \frac{X_{gl, i}}{W} \rfloor - \lfloor \frac {\min(X_{gl})}{W} \rfloor + 1 + + Here, :math:`X_{gl, i}` and :math:`X_{b, i}` are gray level intensities before and after discretization, respectively. + :math:`{W}` is the bin width value (specfied in ``binWidth`` parameter). The first part of the formula ensures that + the bins are equally spaced from 0, whereas the second part ensures that the minimum gray level intensity inside the + ROI after binning is always 1. + + If the range of gray level intensities is equally dividable by the binWidth, i.e. :math:`(\max(X_{gl})- \min(X_{gl})) + \mod W = 0`, the maximum intensity will be encoded as numBins + 1, therefore the maximum number of gray + level intensities in the ROI after binning is number of bins + 1. + + If dynamic binning is enabled (parameter `dynamicBinning`), and no custom binwidth has been defined for the filter, + the actual bin width used (:math:`W_{dyn}`) is defined as: + + .. math:: + W_{dyn} = W * \frac{\max(X_{der})) - \min(X_{der}}{\max(X_{ref})) - \min(X_{ref}} + + Here, :math:`X_{der}` and :math:`X_{ref}` represent the intensities found in the ROI on the derived and original + images, respectively. + + .. warning:: + This is different from the assignment of voxels to the bins by ``numpy.histogram`` , which has half-open bins, with + the exception of the rightmost bin, which means this maximum values are assigned to the topmost bin. + ``numpy.digitize`` uses half-open bins, including the rightmost bin. + + .. note:: + This method is slightly different from the fixed bin size discretization method described by IBSI. The two most + notable differences are 1) that PyRadiomics uses a floor division (and adds 1), as opposed to a ceiling division and + 2) that in PyRadiomics, bins are always equally spaced from 0, as opposed to equally spaced from the minimum + gray level intensity. *Example: for a ROI with values ranging from 54 to 166, and a bin width of 25, the bin edges will be [50, 75, 100, 125, 150, 175].* @@ -33,10 +65,19 @@ def getBinEdges(binwidth, parameterValues): """ global logger + minimum = min(parameterValues) + maximum = max(parameterValues) + + if dynamic_ref_range > 0 and minimum < maximum: + range_scale = (maximum - minimum) / dynamic_ref_range + binwidth = binwidth * range_scale + logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g', + dynamic_ref_range, maximum - minimum, binwidth) + # Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth - lowBound = min(parameterValues) - (min(parameterValues) % binwidth) + lowBound = minimum - (minimum % binwidth) # Add + binwidth to ensure the maximum value is included in the range generated by numpu.arange - highBound = max(parameterValues) + binwidth + highBound = maximum + binwidth binEdges = numpy.arange(lowBound, highBound, binwidth) @@ -52,33 +93,23 @@ def getBinEdges(binwidth, parameterValues): return binEdges # numpy.histogram(parameterValues, bins=binedges) -def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None): +def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None, dynamic_ref_range=None): r""" Discretizes the parameterMatrix (matrix representation of the gray levels in the ROI) using the binEdges calculated - using :py:func:`getBinEdges`. Only voxels defined by parameterMatrixCoordinates (defining the segmentation) are used - for calculation of histogram and subsequently discretized. Voxels outside segmentation are left unchanged. + using :py:func:`getBinEdges` or the number of bins specified in ``binCount``. Only voxels defined by + parameterMatrixCoordinates (defining the segmentation) are used for calculation of histogram and subsequently + discretized. Voxels outside segmentation are left unchanged. - :math:`X_{b, i} = \lfloor \frac{X_{gl, i}}{W} \rfloor - \lfloor \frac {\min(X_{gl})}{W} \rfloor + 1` + Fixed bin width: - Here, :math:`X_{gl, i}` and :math:`X_{b, i}` are gray level intensities before and after discretization, respectively. - :math:`{W}` is the bin width value (specfied in ``binWidth`` parameter). The first part of the formula ensures that - the bins are equally spaced from 0, whereas the second part ensures that the minimum gray level intensity inside the - ROI after binning is always 1. + see :py:func:`getBinEdges()` - If the range of gray level intensities is equally dividable by the binWidth, i.e. :math:`(\max(X_{gl})- \min(X_{gl})) - \mod W = 0`, the maximum intensity will be encoded as numBins + 1, therefore the maximum number of gray - level intensities in the ROI after binning is number of bins + 1. + Fixed bin Count: - .. warning:: - This is different from the assignment of voxels to the bins by ``numpy.histogram`` , which has half-open bins, with - the exception of the rightmost bin, which means this maximum values are assigned to the topmost bin. - ``numpy.digitize`` uses half-open bins, including the rightmost bin. + .. math:: + X_{b, i} = \lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl})) - \min(X_{gl}} \rfloor + 1 - .. note:: - This method is slightly different from the fixed bin size discretization method described by IBSI. The two most - notable differences are 1) that PyRadiomics uses a floor division (and adds 1), as opposed to a ceiling division and - 2) that in PyRadiomics, bins are always equally spaced from 0, as opposed to equally spaced from the minimum - gray level intensity. + Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``. """ global logger logger.debug('Discretizing gray levels inside ROI') @@ -87,13 +118,13 @@ def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincoun if bincount is not None: binEdges = numpy.histogram(parameterMatrix[:], bincount)[1] else: - binEdges = getBinEdges(binwidth, parameterMatrix[:]) + binEdges = getBinEdges(binwidth, parameterMatrix[:], dynamic_ref_range) parameterMatrix = numpy.digitize(parameterMatrix, binEdges) else: if bincount is not None: binEdges = numpy.histogram(parameterMatrix[parameterMatrixCoordinates], bincount)[1] else: - binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates]) + binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], dynamic_ref_range) parameterMatrix[parameterMatrixCoordinates] = numpy.digitize(parameterMatrix[parameterMatrixCoordinates], binEdges) return parameterMatrix, binEdges diff --git a/radiomics/schemas/paramSchema.yaml b/radiomics/schemas/paramSchema.yaml index c309fd7e..f6a90455 100644 --- a/radiomics/schemas/paramSchema.yaml +++ b/radiomics/schemas/paramSchema.yaml @@ -33,6 +33,8 @@ mapping: type: int range: min-ex: 0 + dynamicBinning: + type: bool normalize: type: bool normalizeScale: