Skip to content

Commit

Permalink
ENH: Implement dynamic binning
Browse files Browse the repository at this point in the history
Add the possibility to enable dynamic binning, which scales the bin width by the ratio between the range of intensities seen in the derived and original images (only including intensities in the ROI). This is only done if dynamic binning is enabled (new parameter `dynamicBinning`, boolean, default false) and no custom bin width has been defined for that filter.

This also has no effect if a fixed bin count is used.

Addresses issue #49.
  • Loading branch information
JoostJM committed Aug 24, 2018
1 parent 69ccb7d commit d9d9200
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 28 deletions.
3 changes: 3 additions & 0 deletions docs/customization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ Feature Class Level
- ``binCount`` [None]: integer, > 0, specifies the number of bins to create. The width of the bin is
then determined by the range in the ROI. No definitive evidence is available on which method of discretization is
superior, we advise a fixed bin width. See more :ref:`here <radiomics_fixed_bin_width>`.
- ``dynamicBinning`` [False]: Boolean, if set to true, scales the bin width for derived images by the ratio of the range
in the image (ROI) and the range of the original image (ROI). This setting has no effect when a fixed bin count is
used, or when a custom bin width has been specified for the filter. See also :py:func:`getBinEdges()`.

*Forced 2D extraction*

Expand Down
3 changes: 2 additions & 1 deletion radiomics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ def _applyBinning(self):
self.matrix, self.binEdges = imageoperations.binImage(self.binWidth,
self.imageArray,
self.maskArray,
self.settings.get('binCount', None))
self.settings.get('binCount', None),
self.settings.get('dynamic_ref_range', None))
self.coefficients['grayLevels'] = numpy.unique(self.matrix[self.maskArray])
self.coefficients['Ng'] = int(numpy.max(self.coefficients['grayLevels'])) # max gray level in the ROI

Expand Down
9 changes: 9 additions & 0 deletions radiomics/featureextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,13 @@ def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False):
# Resegmentation successful
mask = resegmentedMask

dynamic_ref_range = None
if self.settings.get('dynamicBinning', False):
im_arr = sitk.GetArrayFromImage(image)
ma_arr = sitk.GetArrayFromImage(mask) == self.settings.get('label', 1)
target_voxel_arr = im_arr[ma_arr]
dynamic_ref_range = max(target_voxel_arr) - min(target_voxel_arr)

# 6. Calculate other enabled feature classes using enabled image types
# Make generators for all enabled image types
self.logger.debug('Creating image type iterator')
Expand All @@ -442,6 +449,8 @@ def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False):
args = self.settings.copy()
args.update(customKwargs)
self.logger.info('Adding image type "%s" with custom settings: %s' % (imageType, str(customKwargs)))
if 'binWidth' not in customKwargs and imageType != 'Original':
args['dynamic_ref_range'] = dynamic_ref_range
imageGenerators = chain(imageGenerators, getattr(imageoperations, 'get%sImage' % imageType)(image, mask, **args))

self.logger.debug('Extracting features')
Expand Down
3 changes: 2 additions & 1 deletion radiomics/firstorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def _getDiscretizedTargetVoxelArray(self):
if self.binCount is not None:
binEdges = self.binCount
else:
binEdges = imageoperations.getBinEdges(self.binWidth, self.targetVoxelArray)
binEdges = imageoperations.getBinEdges(self.binWidth, self.targetVoxelArray,
dynamic_ref_range=self.settings.get('dynamic_ref_range', None))

self.discretizedTargetVoxelArray = numpy.histogram(self.targetVoxelArray, binEdges)[0]

Expand Down
83 changes: 57 additions & 26 deletions radiomics/imageoperations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,46 @@
logger = logging.getLogger(__name__)


def getBinEdges(binwidth, parameterValues):
def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None):
r"""
Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image). Parameter
``binWidth`` determines the fixed width of each bin. This ensures comparable voxels after binning, a fixed bin count
would be dependent on the intensity range in the segmentation.
Returns the bin edges, a list of the edges of the calculated bins, length is N(bins) + 1. Bins are defined such, that
the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`.
the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`:
.. math::
X_{b, i} = \lfloor \frac{X_{gl, i}}{W} \rfloor - \lfloor \frac {\min(X_{gl})}{W} \rfloor + 1
Here, :math:`X_{gl, i}` and :math:`X_{b, i}` are gray level intensities before and after discretization, respectively.
:math:`{W}` is the bin width value (specfied in ``binWidth`` parameter). The first part of the formula ensures that
the bins are equally spaced from 0, whereas the second part ensures that the minimum gray level intensity inside the
ROI after binning is always 1.
If the range of gray level intensities is equally dividable by the binWidth, i.e. :math:`(\max(X_{gl})- \min(X_{gl}))
\mod W = 0`, the maximum intensity will be encoded as numBins + 1, therefore the maximum number of gray
level intensities in the ROI after binning is number of bins + 1.
If dynamic binning is enabled (parameter `dynamicBinning`), and no custom binwidth has been defined for the filter,
the actual bin width used (:math:`W_{dyn}`) is defined as:
.. math::
W_{dyn} = W * \frac{\max(X_{der})) - \min(X_{der}}{\max(X_{ref})) - \min(X_{ref}}
Here, :math:`X_{der}` and :math:`X_{ref}` represent the intensities found in the ROI on the derived and original
images, respectively.
.. warning::
This is different from the assignment of voxels to the bins by ``numpy.histogram`` , which has half-open bins, with
the exception of the rightmost bin, which means this maximum values are assigned to the topmost bin.
``numpy.digitize`` uses half-open bins, including the rightmost bin.
.. note::
This method is slightly different from the fixed bin size discretization method described by IBSI. The two most
notable differences are 1) that PyRadiomics uses a floor division (and adds 1), as opposed to a ceiling division and
2) that in PyRadiomics, bins are always equally spaced from 0, as opposed to equally spaced from the minimum
gray level intensity.
*Example: for a ROI with values ranging from 54 to 166, and a bin width of 25, the bin edges will be [50, 75, 100,
125, 150, 175].*
Expand All @@ -33,10 +65,19 @@ def getBinEdges(binwidth, parameterValues):
"""
global logger

minimum = min(parameterValues)
maximum = max(parameterValues)

if dynamic_ref_range is not None and dynamic_ref_range > 0 and minimum < maximum:
range_scale = (maximum - minimum) / dynamic_ref_range
binwidth = binwidth * range_scale
logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g',
dynamic_ref_range, maximum - minimum, binwidth)

# Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth
lowBound = min(parameterValues) - (min(parameterValues) % binwidth)
lowBound = minimum - (minimum % binwidth)
# Add + binwidth to ensure the maximum value is included in the range generated by numpu.arange
highBound = max(parameterValues) + binwidth
highBound = maximum + binwidth

binEdges = numpy.arange(lowBound, highBound, binwidth)

Expand All @@ -52,33 +93,23 @@ def getBinEdges(binwidth, parameterValues):
return binEdges # numpy.histogram(parameterValues, bins=binedges)


def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None):
def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None, dynamic_ref_range=None):
r"""
Discretizes the parameterMatrix (matrix representation of the gray levels in the ROI) using the binEdges calculated
using :py:func:`getBinEdges`. Only voxels defined by parameterMatrixCoordinates (defining the segmentation) are used
for calculation of histogram and subsequently discretized. Voxels outside segmentation are left unchanged.
using :py:func:`getBinEdges` or the number of bins specified in ``binCount``. Only voxels defined by
parameterMatrixCoordinates (defining the segmentation) are used for calculation of histogram and subsequently
discretized. Voxels outside segmentation are left unchanged.
:math:`X_{b, i} = \lfloor \frac{X_{gl, i}}{W} \rfloor - \lfloor \frac {\min(X_{gl})}{W} \rfloor + 1`
Fixed bin width:
Here, :math:`X_{gl, i}` and :math:`X_{b, i}` are gray level intensities before and after discretization, respectively.
:math:`{W}` is the bin width value (specfied in ``binWidth`` parameter). The first part of the formula ensures that
the bins are equally spaced from 0, whereas the second part ensures that the minimum gray level intensity inside the
ROI after binning is always 1.
see :py:func:`getBinEdges()`
If the range of gray level intensities is equally dividable by the binWidth, i.e. :math:`(\max(X_{gl})- \min(X_{gl}))
\mod W = 0`, the maximum intensity will be encoded as numBins + 1, therefore the maximum number of gray
level intensities in the ROI after binning is number of bins + 1.
Fixed bin Count:
.. warning::
This is different from the assignment of voxels to the bins by ``numpy.histogram`` , which has half-open bins, with
the exception of the rightmost bin, which means this maximum values are assigned to the topmost bin.
``numpy.digitize`` uses half-open bins, including the rightmost bin.
.. math::
X_{b, i} = \lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl})) - \min(X_{gl}} \rfloor + 1
.. note::
This method is slightly different from the fixed bin size discretization method described by IBSI. The two most
notable differences are 1) that PyRadiomics uses a floor division (and adds 1), as opposed to a ceiling division and
2) that in PyRadiomics, bins are always equally spaced from 0, as opposed to equally spaced from the minimum
gray level intensity.
Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``.
"""
global logger
logger.debug('Discretizing gray levels inside ROI')
Expand All @@ -87,13 +118,13 @@ def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincoun
if bincount is not None:
binEdges = numpy.histogram(parameterMatrix[:], bincount)[1]
else:
binEdges = getBinEdges(binwidth, parameterMatrix[:])
binEdges = getBinEdges(binwidth, parameterMatrix[:], dynamic_ref_range)
parameterMatrix = numpy.digitize(parameterMatrix, binEdges)
else:
if bincount is not None:
binEdges = numpy.histogram(parameterMatrix[parameterMatrixCoordinates], bincount)[1]
else:
binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates])
binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], dynamic_ref_range)
parameterMatrix[parameterMatrixCoordinates] = numpy.digitize(parameterMatrix[parameterMatrixCoordinates], binEdges)

return parameterMatrix, binEdges
Expand Down
2 changes: 2 additions & 0 deletions radiomics/schemas/paramSchema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ mapping:
type: int
range:
min-ex: 0
dynamicBinning:
type: bool
normalize:
type: bool
normalizeScale:
Expand Down

0 comments on commit d9d9200

Please sign in to comment.