Skip to content

Commit

Permalink
ENH: Move application of bin count to getBinEdges
Browse files Browse the repository at this point in the history
Additionally, add +1 to uppermost bin edge to ensure it gets discretized to the topmost bin in numpy.digitize (instead of topmost + 1).
  • Loading branch information
JoostJM committed Sep 12, 2018
1 parent 3bac9dd commit be069ac
Showing 1 changed file with 42 additions and 46 deletions.
88 changes: 42 additions & 46 deletions radiomics/imageoperations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
logger = logging.getLogger(__name__)


def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None):
def getBinEdges(binwidth, parameterValues, binCount=None, dynamic_ref_range=None):
r"""
Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image). Parameter
``binWidth`` determines the fixed width of each bin. This ensures comparable voxels after binning, a fixed bin count
would be dependent on the intensity range in the segmentation.
Calculate and return the histogram using parameterValues (1D array of all segmented voxels in the image).
**Fixed bin width:**
Returns the bin edges, a list of the edges of the calculated bins, length is N(bins) + 1. Bins are defined such, that
the bin edges are equally spaced from zero, and that the leftmost edge :math:`\leq \min(X_{gl})`:
Expand All @@ -36,7 +36,7 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None):
the actual bin width used (:math:`W_{dyn}`) is defined as:
.. math::
W_{dyn} = W * \frac{\max(X_{der})) - \min(X_{der}}{\max(X_{ref})) - \min(X_{ref}}
W_{dyn} = W * \frac{\max(X_{der}) - \min(X_{der})}{\max(X_{ref}) - \min(X_{ref})}
Here, :math:`X_{der}` and :math:`X_{ref}` represent the intensities found in the ROI on the derived and original
images, respectively.
Expand All @@ -58,73 +58,69 @@ def getBinEdges(binwidth, parameterValues, dynamic_ref_range=None):
This value can be directly passed to ``numpy.histogram`` to generate a histogram or ``numpy.digitize`` to discretize
the ROI gray values. See also :py:func:`binImage()`.
**Fixed bin Count:**
.. math::
X_{b, i} = \left\{ {\begin{array}{lcl}
\lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl}) - \min(X_{gl})} \rfloor + 1 &
\mbox{for} & X_{gl, i} < \max(X_{gl}) \\
N_b & \mbox{for} & X_{gl, i} = \max(X_{gl}) \end{array}} \right.
Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``.
References
- Leijenaar RTH, Nalbantov G, Carvalho S, et al. The effect of SUV discretization in quantitative FDG-PET Radiomics:
the need for standardized methodology in tumor texture analysis. Sci Rep. 2015;5(August):11075.
"""
global logger

minimum = min(parameterValues)
maximum = max(parameterValues)
if binCount is not None:
binEdges = numpy.histogram(parameterValues, binCount)[1]
binEdges[-1] += 1 # Ensures that the maximum value is included in the topmost bin when using numpy.digitize
else:
minimum = min(parameterValues)
maximum = max(parameterValues)

if dynamic_ref_range is not None and dynamic_ref_range > 0 and minimum < maximum:
range_scale = (maximum - minimum) / dynamic_ref_range
binwidth = binwidth * range_scale
logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g',
dynamic_ref_range, maximum - minimum, binwidth)
if dynamic_ref_range is not None and dynamic_ref_range > 0 and minimum < maximum:
range_scale = (maximum - minimum) / dynamic_ref_range
binwidth = binwidth * range_scale
logger.debug('Applied dynamic binning (reference range %g, current range %g), scaled bin width to %g',
dynamic_ref_range, maximum - minimum, binwidth)

# Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth
lowBound = minimum - (minimum % binwidth)
# Add + binwidth to ensure the maximum value is included in the range generated by numpu.arange
highBound = maximum + binwidth
# Start binning form the first value lesser than or equal to the minimum value and evenly dividable by binwidth
lowBound = minimum - (minimum % binwidth)
# Add + binwidth to ensure the maximum value is included in the range generated by numpy.arange
highBound = maximum + binwidth

binEdges = numpy.arange(lowBound, highBound, binwidth)
binEdges = numpy.arange(lowBound, highBound, binwidth)

# if min(parameterValues) % binWidth = 0 and min(parameterValues) = max(parameterValues), binEdges will only contain
# 1 value. If this is the case (flat region) ensure that numpy.histogram creates 1 bin (requires 2 edges). For
# numpy.histogram, a binCount (1) would also suffice, however, this is not accepted by numpy.digitize, which also uses
# binEdges calculated by this function.
if len(binEdges) == 1: # Flat region, ensure that there is 1 bin
binEdges = [binEdges[0] - .5, binEdges[0] + .5] # Simulates binEdges returned by numpy.histogram if bins = 1
# if min(parameterValues) % binWidth = 0 and min(parameterValues) = max(parameterValues), binEdges will only contain
# 1 value. If this is the case (flat region) ensure that numpy.histogram creates 1 bin (requires 2 edges). For
# numpy.histogram, a binCount (1) would also suffice, however, this is not accepted by numpy.digitize, which also uses
# binEdges calculated by this function.
if len(binEdges) == 1: # Flat region, ensure that there is 1 bin
binEdges = [binEdges[0] - .5, binEdges[0] + .5] # Simulates binEdges returned by numpy.histogram if bins = 1

logger.debug('Calculated %d bins for bin width %g with edges: %s)', len(binEdges) - 1, binwidth, binEdges)
logger.debug('Calculated %d bins for bin width %g with edges: %s)', len(binEdges) - 1, binwidth, binEdges)

return binEdges # numpy.histogram(parameterValues, bins=binedges)


def binImage(binwidth, parameterMatrix, parameterMatrixCoordinates=None, bincount=None, dynamic_ref_range=None):
r"""
Discretizes the parameterMatrix (matrix representation of the gray levels in the ROI) using the binEdges calculated
using :py:func:`getBinEdges` or the number of bins specified in ``binCount``. Only voxels defined by
parameterMatrixCoordinates (defining the segmentation) are used for calculation of histogram and subsequently
discretized. Voxels outside segmentation are left unchanged.
Fixed bin width:
see :py:func:`getBinEdges()`
Fixed bin Count:
.. math::
X_{b, i} = \lfloor N_b\frac{(X_{gl, i} - \min(X_{gl})}{\max(X_{gl})) - \min(X_{gl}} \rfloor + 1
Here, :math:`N_b` is the number of bins to use, as defined in ``binCount``.
using :py:func:`getBinEdges`. Only voxels defined by parameterMatrixCoordinates (defining the segmentation) are used
for calculation of histogram and subsequently discretized. Voxels outside segmentation are left unchanged.
"""
global logger
logger.debug('Discretizing gray levels inside ROI')

if parameterMatrixCoordinates is None:
if bincount is not None:
binEdges = numpy.histogram(parameterMatrix[:], bincount)[1]
else:
binEdges = getBinEdges(binwidth, parameterMatrix[:], dynamic_ref_range)
binEdges = getBinEdges(binwidth, parameterMatrix[:], bincount, dynamic_ref_range)
parameterMatrix = numpy.digitize(parameterMatrix, binEdges)
else:
if bincount is not None:
binEdges = numpy.histogram(parameterMatrix[parameterMatrixCoordinates], bincount)[1]
else:
binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], dynamic_ref_range)
binEdges = getBinEdges(binwidth, parameterMatrix[parameterMatrixCoordinates], bincount, dynamic_ref_range)
parameterMatrix[parameterMatrixCoordinates] = numpy.digitize(parameterMatrix[parameterMatrixCoordinates], binEdges)

return parameterMatrix, binEdges
Expand Down

0 comments on commit be069ac

Please sign in to comment.