From 73694988d3a45c074a7986aa4df1d212cae5f7ed Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Fri, 24 Aug 2018 11:39:05 +0200 Subject: [PATCH] ENH: Move application of bin count to getBinEdges Additionally, add +1 to uppermost bin edge to ensure it gets discretized to the topmost bin in numpy.digitize (instead of topmost + 1). --- radiomics/imageoperations.py | 88 +++++++++++++++++------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/radiomics/imageoperations.py b/radiomics/imageoperations.py index 0722acc3..457c8dd0 100644 --- a/radiomics/imageoperations.py +++ b/radiomics/imageoperations.py @@ -11,11 +11,11 @@ logger = logging.getLogger(__name__) -def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): +def getBinEdges(binwidth, parameterValues, binCount=None, dynamic_ref_range=None): r""" - Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image). Parameter - ``binWidth`` determines the fixed width of each bin. This ensures comparable voxels after binning, a fixed bin count - would be dependent on the intensity range in the segmentation. + Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image). + + **Fixed bin width:** Returns the bin edges, a list of the edges of the calculated bins, length is N(bins) + 1. Bins are defined such, that the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`: @@ -36,7 +36,7 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): the actual bin width used (:math:`W_{dyn}`) is defined as: .. math:: - W_{dyn} = W * \frac{\max(X_{der})) - \min(X_{der}}{\max(X_{ref})) - \min(X_{ref}} + W_{dyn} = W * \frac{\max(X_{der}) - \min(X_{der})}{\max(X_{ref}) - \min(X_{ref})} Here, :math:`X_{der}` and :math:`X_{ref}` represent the intensities found in the ROI on the derived and original images, respectively. @@ -58,6 +58,16 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): This value can be directly passed to ``numpy.histogram`` to generate a histogram or ``numpy.digitize`` to discretize the ROI gray values. See also :py:func:`binImage()`. + **Fixed bin Count:** + + .. math:: + X_{b, i} = \left\{ {\begin{array}{lcl} + \lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl}) - \min(X_{gl})} \rfloor + 1 & + \mbox{for} & X_{gl, i} < \max(X_{gl}) \\ + N_b & \mbox{for} & X_{gl, i} = \max(X_{gl}) \end{array}} \right. + + Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``. + References - Leijenaar RTH, Nalbantov G, Carvalho S, et al. The effect of SUV discretization in quantitative FDG-PET Radiomics: @@ -65,30 +75,34 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): """ global logger - minimum = min(parameterValues) - maximum = max(parameterValues) + if binCount is not None: + binEdges = numpy.histogram(parameterValues, binCount)[1] + binEdges[-1] += 1 # Ensures that the maximum value is included in the topmost bin when using numpy.digitize + else: + minimum = min(parameterValues) + maximum = max(parameterValues) - if dynamic_ref_range is not None and dynamic_ref_range > 0 and minimum < maximum: - range_scale = (maximum - minimum) / dynamic_ref_range - binwidth = binwidth * range_scale - logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g', - dynamic_ref_range, maximum - minimum, binwidth) + if dynamic_ref_range is not None and dynamic_ref_range > 0 and minimum < maximum: + range_scale = (maximum - minimum) / dynamic_ref_range + binwidth = binwidth * range_scale + logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g', + dynamic_ref_range, maximum - minimum, binwidth) - # Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth - lowBound = minimum - (minimum % binwidth) - # Add + binwidth to ensure the maximum value is included in the range generated by numpu.arange - highBound = maximum + binwidth + # Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth + lowBound = minimum - (minimum % binwidth) + # Add + binwidth to ensure the maximum value is included in the range generated by numpy.arange + highBound = maximum + binwidth - binEdges = numpy.arange(lowBound, highBound, binwidth) + binEdges = numpy.arange(lowBound, highBound, binwidth) - # if min(parameterValues) % binWidth = 0 and min(parameterValues) = max(parameterValues), binEdges will only contain - # 1 value. If this is the case (flat region) ensure that numpy.histogram creates 1 bin (requires 2 edges). For - # numpy.histogram, a binCount (1) would also suffice, however, this is not accepted by numpy.digitize, which also uses - # binEdges calculated by this function. - if len(binEdges) == 1: # Flat region, ensure that there is 1 bin - binEdges = [binEdges[0] - .5, binEdges[0] + .5] # Simulates binEdges returned by numpy.histogram if bins = 1 + # if min(parameterValues) % binWidth = 0 and min(parameterValues) = max(parameterValues), binEdges will only contain + # 1 value. If this is the case (flat region) ensure that numpy.histogram creates 1 bin (requires 2 edges). For + # numpy.histogram, a binCount (1) would also suffice, however, this is not accepted by numpy.digitize, which also uses + # binEdges calculated by this function. + if len(binEdges) == 1: # Flat region, ensure that there is 1 bin + binEdges = [binEdges[0] - .5, binEdges[0] + .5] # Simulates binEdges returned by numpy.histogram if bins = 1 - logger.debug('Calculated %d bins for bin width %g with edges: %s)', len(binEdges) - 1, binwidth, binEdges) + logger.debug('Calculated %d bins for bin width %g with edges: %s)', len(binEdges) - 1, binwidth, binEdges) return binEdges # numpy.histogram(parameterValues, bins=binedges) @@ -96,35 +110,17 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None): def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None, dynamic_ref_range=None): r""" Discretizes the parameterMatrix (matrix representation of the gray levels in the ROI) using the binEdges calculated - using :py:func:`getBinEdges` or the number of bins specified in ``binCount``. Only voxels defined by - parameterMatrixCoordinates (defining the segmentation) are used for calculation of histogram and subsequently - discretized. Voxels outside segmentation are left unchanged. - - Fixed bin width: - - see :py:func:`getBinEdges()` - - Fixed bin Count: - - .. math:: - X_{b, i} = \lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl})) - \min(X_{gl}} \rfloor + 1 - - Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``. + using :py:func:`getBinEdges`. Only voxels defined by parameterMatrixCoordinates (defining the segmentation) are used + for calculation of histogram and subsequently discretized. Voxels outside segmentation are left unchanged. """ global logger logger.debug('Discretizing gray levels inside ROI') if parameterMatrixCoordinates is None: - if bincount is not None: - binEdges = numpy.histogram(parameterMatrix[:], bincount)[1] - else: - binEdges = getBinEdges(binwidth, parameterMatrix[:], dynamic_ref_range) + binEdges = getBinEdges(binwidth, parameterMatrix[:], bincount, dynamic_ref_range) parameterMatrix = numpy.digitize(parameterMatrix, binEdges) else: - if bincount is not None: - binEdges = numpy.histogram(parameterMatrix[parameterMatrixCoordinates], bincount)[1] - else: - binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], dynamic_ref_range) + binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], bincount, dynamic_ref_range) parameterMatrix[parameterMatrixCoordinates] = numpy.digitize(parameterMatrix[parameterMatrixCoordinates], binEdges) return parameterMatrix, binEdges