Skip to content

Commit

Permalink
Revised binning kind arguments. Added 'equal area' binning and 'custo…
Browse files Browse the repository at this point in the history
…m' bin options
  • Loading branch information
erikmannerfelt committed Mar 15, 2021
1 parent ecdfdea commit 3b33d4b
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 18 deletions.
49 changes: 47 additions & 2 deletions tests/test_volume.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import warnings

import geoutils as gu
import numpy as np
import pandas as pd

import xdem
with warnings.catch_warnings():
warnings.simplefilter("ignore")
import xdem

xdem.examples.download_longyearbyen_examples(overwrite=False)

Expand All @@ -23,7 +27,11 @@ def test_bin_ddem(self):
"""Test dDEM binning."""
ddem = self.dem_2009.data - self.dem_1990.data

ddem_bins = xdem.volume.hypsometric_binning(ddem.squeeze()[self.mask], self.dem_2009.data.squeeze()[self.mask])
ddem_bins = xdem.volume.hypsometric_binning(
ddem.squeeze()[self.mask],
self.dem_2009.data.squeeze()[self.mask],
bins=50,
kind="equal_height")

ddem_bins_masked = xdem.volume.hypsometric_binning(
np.ma.masked_array(ddem.squeeze(), mask=~self.mask),
Expand Down Expand Up @@ -116,3 +124,40 @@ def test_area_calculation(self):

# The area of Scott Turnerbreen was around 3.4 km² in 1990, so this should be close to that number.
assert 2e6 < bin_area.sum() < 5e6

def test_ddem_bin_methods(self):
"""Test different dDEM binning methods."""
ddem = self.dem_2009.data - self.dem_1990.data

# equal height is already tested in test_bin_ddem

# Make a fixed amount of bins
equal_count_bins = xdem.volume.hypsometric_binning(
ddem.squeeze()[self.mask],
self.dem_2009.data.squeeze()[self.mask],
bins=50,
kind="equal_count"
)
assert equal_count_bins.shape[0] == 50

# Make 50 bins with approximately the same area (pixel count)
equal_area_bins = xdem.volume.hypsometric_binning(
ddem.squeeze()[self.mask],
self.dem_2009.data.squeeze()[self.mask],
bins=50,
kind="equal_area"
)

assert equal_area_bins.shape[0] == 50
# Make sure that the pixel count variation is low.
assert equal_area_bins["count"].std() < 1

# Try to feed the previous bins as "custom" bins to the function.
custom_bins = xdem.volume.hypsometric_binning(
ddem.squeeze()[self.mask],
self.dem_2009.data.squeeze()[self.mask],
bins=np.r_[equal_area_bins.index.left[0], equal_area_bins.index.right],
kind="custom"
)

assert custom_bins.shape[0] == equal_area_bins.shape[0]
45 changes: 29 additions & 16 deletions xdem/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,23 @@
import scipy.interpolate


def hypsometric_binning(ddem: np.ndarray, ref_dem: np.ndarray, bin_size=50,
normalized_bin_size: bool = False, aggregation_function: Callable = np.median) -> pd.DataFrame:
def hypsometric_binning(ddem: np.ndarray, ref_dem: np.ndarray, bins: Union[float, np.ndarray] = 50.0,
kind: str = "equal_height", aggregation_function: Callable = np.median) -> pd.DataFrame:
"""
Separate the dDEM in discrete elevation bins.
It is assumed that the dDEM is calculated as 'ref_dem - dem' (not 'dem - ref_dem').
:param ddem: The dDEM as a 2D or 1D array.
:param ref_dem: The reference DEM as a 2D or 1D array.
:param bin_size: The bin interval size in georeferenced units (or percent; 0-100, if normalized_bin_size=True)
:param normalized_bin_size: If the given bin size should be parsed as a percentage of the glacier's elevation range.
:param bins: The bin size, count, or array, depending on the binning method ('kind').
:param kind: The kind of binning to do. Choices: ['equal_height', 'equal_count', 'equal_area', 'custom'].
:param aggregation_function: The function to aggregate the elevation values within a bin. Defaults to the median.
:returns: A Pandas DataFrame with elevation bins and dDEM statistics.
"""
assert ddem.shape == ref_dem.shape

# Remove all nans, and flatten the inputs.
nan_mask = np.logical_or(
np.isnan(ddem) if not isinstance(ddem, np.ma.masked_array) else ddem.mask,
Expand All @@ -37,21 +38,33 @@ def hypsometric_binning(ddem: np.ndarray, ref_dem: np.ndarray, bin_size=50,
mean_dem = ref_dem - (ddem / 2)

# If the bin size should be seen as a percentage.
if normalized_bin_size:
assert 0 < bin_size < 100

# Get the statistical elevation range to normalize the bin size with
elevation_range = np.percentile(mean_dem, 99) - np.percentile(mean_dem, 1)
bin_size = elevation_range / bin_size
if kind == "equal_height":
zbins = np.arange(mean_dem.min(), mean_dem.max() + bins, step=bins)
elif kind == "equal_count":
# Make bins between mean_dem.min() and a little bit above mean_dem.max().
# The bin count has to be bins + 1 because zbins[0] will be a "below min value" bin, which will be irrelevant.
zbins = np.linspace(mean_dem.min(), mean_dem.max() + 1e-6 / bins, num=int(bins + 1))
elif kind == "equal_area":
# Make the percentile steps. The bins + 1 is explained above.
steps = np.linspace(0, 100, num=int(bins) + 1)
zbins = np.fromiter(
(np.percentile(mean_dem, step) for step in steps),
dtype=float
)
# The uppermost bin needs to be a tiny amount larger than the highest value to include it.
zbins[-1] += 1e-6
elif kind == "custom":
zbins = bins
else:
raise ValueError(f"Invalid bin kind: {kind}. Choices: ['equal_size', 'equal_count', 'equal_area', 'custom']")

# Generate bins and get bin indices from the mean DEM
bins = np.arange(mean_dem.min(), mean_dem.max() + bin_size, step=bin_size)
indices = np.digitize(mean_dem, bins=bins)
indices = np.digitize(mean_dem, bins=zbins)

# Calculate statistics for each bin.
# If no values exist, all stats should be nans (except count with should be 0)
# medians, means, stds, nmads = (np.zeros(shape=bins.shape[0] - 1, dtype=ddem.dtype) * np.nan, ) * 4
values = np.zeros(shape=bins.shape[0] - 1, dtype=ddem.dtype) * np.nan
values = np.zeros(shape=zbins.shape[0] - 1, dtype=ddem.dtype) * np.nan
counts = np.zeros_like(values, dtype=int)
for i in np.arange(indices.min(), indices.max() + 1):
values_in_bin = ddem[indices == i]
Expand All @@ -68,7 +81,7 @@ def hypsometric_binning(ddem: np.ndarray, ref_dem: np.ndarray, bin_size=50,

# Collect the results in a dataframe
output = pd.DataFrame(
index=pd.IntervalIndex.from_breaks(bins),
index=pd.IntervalIndex.from_breaks(zbins),
data=np.vstack([
values, counts
]).T,
Expand Down Expand Up @@ -142,11 +155,11 @@ def calculate_hypsometry_area(ddem_bins: Union[pd.Series, pd.DataFrame], ref_dem
kind="linear", fill_value="extrapolate")
# Generate average elevations by subtracting half of the dDEM's values to the reference DEM
if timeframe == "mean":
elevations = ref_dem - (ddem_func(ref_dem) / 2)
elevations = ref_dem - (ddem_func(ref_dem.data) / 2)
elif timeframe == "reference":
elevations = ref_dem
elif timeframe == "nonreference":
elevations = ref_dem - ddem_func(ref_dem)
elevations = ref_dem - ddem_func(ref_dem.data)

# Extract the bins from the dDEM series and compute the frequency of points in the bins.
bins = np.r_[[ddem_bins.index.left[0]], ddem_bins.index.right]
Expand Down

0 comments on commit 3b33d4b

Please sign in to comment.