Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add robust polynomial, sum of sinusoids fitting #151

Merged
merged 39 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f7f6108
Merge pull request #6 from GlacioHack/main
rhugonnet Mar 27, 2021
9c97f54
Merge branch 'GlacioHack:main' into main
rhugonnet Jun 17, 2021
4c4bfbf
add robust polynomial fit + tests
rhugonnet Jun 22, 2021
54872f8
add array dimension to docs
rhugonnet Jun 22, 2021
bafec5e
streamline test polynomial fit
rhugonnet Jun 22, 2021
204434a
fix polynomial fit tests
rhugonnet Jun 22, 2021
78e2ef7
import scikit-learn as optional dependency
rhugonnet Jun 22, 2021
eb397fa
Merge branch 'main' into angle_binning
rhugonnet Jun 22, 2021
20e8e5e
use new subsample function + small fixes
rhugonnet Jun 22, 2021
3c4ea4a
fix test
rhugonnet Jun 22, 2021
6abe090
add comments
rhugonnet Jun 22, 2021
e6034ea
fixes with Eriks comments
rhugonnet Jun 24, 2021
86d565b
improve tests with Erik comments
rhugonnet Jun 24, 2021
54d4a22
fix test
rhugonnet Jun 24, 2021
cc9398f
add draft for robust scaling using ML methods
rhugonnet Jun 24, 2021
6335fe7
draft robust sum of sin basinhopping
rhugonnet Jun 25, 2021
46f2c08
move nd binning to spatial_tools
rhugonnet Jun 25, 2021
549a734
finish basinhopping for sumfit
rhugonnet Jun 25, 2021
8e589e7
add tests for sum of sins
rhugonnet Jun 25, 2021
c19953b
move tests for nd binning
rhugonnet Jun 25, 2021
1043845
fix typing error
rhugonnet Jun 25, 2021
268e63a
fix tests
rhugonnet Jun 25, 2021
6e7cf23
Merge remote-tracking branch 'upstream/main' into angle_binning
rhugonnet Sep 6, 2021
5cfe2e7
rewrite tests with pytest.approx
rhugonnet Sep 6, 2021
1ca8c4f
use np.polyval instead of writing out the polynomial
rhugonnet Sep 6, 2021
f060cf9
rest of amaury comments
rhugonnet Sep 6, 2021
616bd7e
add fit module, refactor nmad into spatialstats
rhugonnet Sep 6, 2021
700bf85
fix tests
rhugonnet Sep 6, 2021
a2d4acf
finish refactor nmad, fix tests
rhugonnet Sep 7, 2021
dba11f4
increase error margin of test
rhugonnet Sep 7, 2021
28e96ed
try fixing test
rhugonnet Sep 7, 2021
aaf818f
add print statement to check values in CI
rhugonnet Sep 7, 2021
317c58c
move print statement to the right place
rhugonnet Sep 7, 2021
3051384
streamline comments
rhugonnet Sep 7, 2021
9dc9d5f
further streamline comments
rhugonnet Sep 7, 2021
5387422
remove print statement
rhugonnet Sep 7, 2021
917d905
subdivide scipy and sklearn into wrapper functions for reuse and clarity
rhugonnet Sep 7, 2021
6588c01
skip randomly failing test
rhugonnet Sep 7, 2021
0d2e7ec
fix skip syntax
rhugonnet Sep 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions tests/test_spatial_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

Author(s):
Erik S. Holmlund
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
Romain Hugonnet

"""
from __future__ import annotations

import os
import shutil
import subprocess
Expand All @@ -12,8 +15,10 @@

import geoutils as gu
import numpy as np
import pandas as pd
import pytest
import rasterio as rio
from sklearn.metrics import mean_squared_error, median_absolute_error

import xdem
from xdem import examples
Expand All @@ -28,6 +33,31 @@ def test_dem_subtraction():
assert np.nanmean(np.abs(diff.data)) < 100


def load_ref_and_diff() -> tuple[gu.georaster.Raster, gu.georaster.Raster, np.ndarray]:
"""Load example files to try coregistration methods with."""
examples.download_longyearbyen_examples(overwrite=False)

reference_raster = gu.georaster.Raster(examples.FILEPATHS["longyearbyen_ref_dem"])
to_be_aligned_raster = gu.georaster.Raster(examples.FILEPATHS["longyearbyen_tba_dem"])
glacier_mask = gu.geovector.Vector(examples.FILEPATHS["longyearbyen_glacier_outlines"])
inlier_mask = ~glacier_mask.create_mask(reference_raster)

metadata = {}
# aligned_raster, _ = xdem.coreg.coregister(reference_raster, to_be_aligned_raster, method="amaury", mask=glacier_mask,
# metadata=metadata)
nuth_kaab = xdem.coreg.NuthKaab()
nuth_kaab.fit(reference_raster.data, to_be_aligned_raster.data,
inlier_mask=inlier_mask, transform=reference_raster.transform)
aligned_raster = nuth_kaab.apply(to_be_aligned_raster.data, transform=reference_raster.transform)

diff = gu.Raster.from_array((reference_raster.data - aligned_raster),
transform=reference_raster.transform, crs=reference_raster.crs)
mask = glacier_mask.create_mask(diff)

return reference_raster, diff, mask



class TestMerging:
"""
Test cases for stacking and merging DEMs
Expand Down Expand Up @@ -178,6 +208,136 @@ def make_gdal_hillshade(filepath) -> np.ndarray:
# Make sure that this doesn't create weird division warnings.
xdem.spatial_tools.hillshade(dem.data, dem.res)

class TestRobustFitting:

@pytest.mark.parametrize("pkg_estimator", [('sklearn','Linear'), ('scipy','Linear'), ('sklearn','Theil-Sen'),
('sklearn','RANSAC'),('sklearn','Huber')])
def test_robust_polynomial_fit(self, pkg_estimator: str) -> None:

np.random.seed(42)

# x vector
x = np.linspace(1, 10, 1000)
# exact polynomial
true_coefs = [-100, 5, 3, 2]
y = true_coefs[0] + true_coefs[1] * x + true_coefs[2] * x ** 2 + true_coefs[3] * x ** 3
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved

coefs, deg = xdem.spatial_tools.robust_polynomial_fit(x, y, linear_pkg=pkg_estimator[0], estimator=pkg_estimator[1], random_state=42)

assert deg == 3 or deg == 4
assert np.abs(coefs[0] - true_coefs[0]) <= 100
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
assert np.abs(coefs[1] - true_coefs[1]) < 5
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
assert np.abs(coefs[2] - true_coefs[2]) < 2
assert np.abs(coefs[3] - true_coefs[3]) < 1

def test_robust_polynomial_fit_noise_and_outliers(self):

np.random.seed(42)

# x vector
x = np.linspace(1,10,1000)
# exact polynomial
true_coefs = [-100, 5, 3, 2]
y = true_coefs[0] + true_coefs[1] * x + true_coefs[2] * x**2 + true_coefs[3] * x**3
# add some noise on top
y += np.random.normal(loc=0,scale=3,size=1000)
# and some outliers
y[50:75] = 0
y[900:925] = 1000

# test linear estimators
coefs, deg = xdem.spatial_tools.robust_polynomial_fit(x,y, estimator='Linear', linear_pkg='scipy', loss='soft_l1', f_scale=0.5)

# scipy solution should be quite robust to outliers/noise (with the soft_l1 method and f_scale parameter)
# however, it is subject to random processes inside the scipy function (couldn't find how to fix those...)
assert deg == 3 or deg == 4 # can find degree 3, or 4 with coefficient close to 0
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
assert np.abs(coefs[0] - true_coefs[0]) < 3
assert np.abs(coefs[1] - true_coefs[1]) < 3
assert np.abs(coefs[2] - true_coefs[2]) < 1
assert np.abs(coefs[3] - true_coefs[3]) < 1

# the sklearn Linear solution with MSE cost function will not be robust
coefs2, deg2 = xdem.spatial_tools.robust_polynomial_fit(x,y, estimator='Linear', linear_pkg='sklearn', cost_func=mean_squared_error, margin_improvement=50)
assert deg2 != 3
# using the median absolute error should improve the fit, but the parameters will still be hard to constrain
coefs3, deg3 = xdem.spatial_tools.robust_polynomial_fit(x,y, estimator='Linear', linear_pkg='sklearn', cost_func=median_absolute_error, margin_improvement=50)
assert deg3 == 3
assert np.abs(coefs3[0] - true_coefs[0]) > 50
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
assert np.abs(coefs3[1] - true_coefs[1]) > 10
assert np.abs(coefs3[2] - true_coefs[2]) > 5
assert np.abs(coefs3[3] - true_coefs[3]) > 0.5

# test robust estimator

# Theil-Sen should have better coefficients
coefs4, deg4 = xdem.spatial_tools.robust_polynomial_fit(x, y, estimator='Theil-Sen', random_state=42)
assert deg4 == 3
# high degree coefficients should be well constrained
assert np.abs(coefs4[2] - true_coefs[2]) < 1
assert np.abs(coefs4[3] - true_coefs[3]) < 1

# RANSAC is not always optimal, here it does not work well
coefs5, deg5 = xdem.spatial_tools.robust_polynomial_fit(x, y, estimator='RANSAC', random_state=42)
assert deg5 != 3

# Huber should perform well, close to the scipy robust solution
coefs6, deg6 = xdem.spatial_tools.robust_polynomial_fit(x, y, estimator='Huber')
assert deg6 == 3
assert np.abs(coefs6[1] - true_coefs[1]) < 1
assert np.abs(coefs6[2] - true_coefs[2]) < 1
assert np.abs(coefs6[3] - true_coefs[3]) < 1

def test_robust_sumsin_fit(self) -> None:

# x vector
x = np.linspace(0, 10, 1000)
# exact polynomial
true_coefs = np.array([(5, 1, np.pi),(3, 0.3, 0)]).flatten()
y = xdem.spatial_tools._fitfun_sumofsin(x,params=true_coefs)

# check that the function runs
coefs, deg = xdem.spatial_tools.robust_sumsin_fit(x,y, random_state=42)

# check that the estimated sum of sinusoid correspond to the input
for i in range(2):
assert (coefs[3*i] - true_coefs[3*i]) < 0.01

# test that using custom arguments does not create an error
bounds = [(3,7),(0.1,3),(0,2*np.pi),(1,7),(0.1,1),(0,2*np.pi),(0,1),(0.1,1),(0,2*np.pi)]
coefs, deg = xdem.spatial_tools.robust_sumsin_fit(x,y,bounds_amp_freq_phase=bounds, nb_frequency_max=2
, significant_res=0.01, random_state=42)

def test_robust_simsin_fit_noise_and_outliers(self):

# test the robustness to outliers

np.random.seed(42)
# x vector
x = np.linspace(0, 10, 1000)
# exact polynomial
true_coefs = np.array([(5, 1, np.pi), (3, 0.3, 0)]).flatten()
y = xdem.spatial_tools._fitfun_sumofsin(x, params=true_coefs)

# adding some noise
y += np.random.normal(loc=0, scale=0.25, size=1000)
# and some outliers
y[50:75] = -10
y[900:925] = 10

bounds = [(3, 7), (0.1, 3), (0, 2 * np.pi), (1, 7), (0.1, 1), (0, 2 * np.pi), (0, 1), (0.1, 1), (0, 2 * np.pi)]
coefs, deg = xdem.spatial_tools.robust_sumsin_fit(x,y, random_state=42, bounds_amp_freq_phase=bounds)

# should be less precise, but still on point
# re-order output coefficient to match input
if coefs[3] > coefs[0]:
coefs = np.concatenate((coefs[3:],coefs[0:3]))

# check values
for i in range(2):
assert (coefs[3*i] - true_coefs[3*i]) < 0.2
assert (coefs[3 * i +1] - true_coefs[3 * i +1]) < 0.2
error_phase = min(np.abs(coefs[3 * i + 2] - true_coefs[ 3* i + 2]), np.abs(2* np.pi - (coefs[3 * i + 2] - true_coefs[ 3* i + 2])))
assert error_phase < 0.2

class TestSubsample:
"""
Expand Down Expand Up @@ -233,3 +393,50 @@ def test_subsample(self, array):
assert len(indices) == np.ndim(array)
assert np.ndim(array[indices]) == 1
assert np.size(array[indices]) == int(np.size(array) * 0.3)


class TestBinning:

def test_nd_binning(self):

ref, diff, mask = load_ref_and_diff()

slope, aspect = xdem.coreg.calculate_slope_and_aspect(ref.data.squeeze())

# 1d binning, by default will create 10 bins
df = xdem.spatial_tools.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten()],list_var_names=['slope'])

# check length matches
assert df.shape[0] == 10
# check bin edges match the minimum and maximum of binning variable
assert np.nanmin(slope) == np.min(pd.IntervalIndex(df.slope).left)
assert np.nanmax(slope) == np.max(pd.IntervalIndex(df.slope).right)

# 1d binning with 20 bins
df = xdem.spatial_tools.nd_binning(values=diff.data.flatten(), list_var=[slope.flatten()], list_var_names=['slope'],
list_var_bins=[[20]])
# check length matches
assert df.shape[0] == 20

# nmad goes up quite a bit with slope, we can expect a 10 m measurement error difference
assert df.nmad.values[-1] - df.nmad.values[0] > 10

# try custom stat
def percentile_80(a):
return np.nanpercentile(a, 80)

# check the function runs with custom functions
xdem.spatial_tools.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten()],list_var_names=['slope'], statistics=['count',percentile_80])

# 2d binning
df = xdem.spatial_tools.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten(),ref.data.flatten()],list_var_names=['slope','elevation'])

# dataframe should contain two 1D binning of length 10 and one 2D binning of length 100
assert df.shape[0] == (10 + 10 + 100)

# nd binning
df = xdem.spatial_tools.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten(),ref.data.flatten(),aspect.flatten()],list_var_names=['slope','elevation','aspect'])

# dataframe should contain three 1D binning of length 10 and three 2D binning of length 100 and one 2D binning of length 1000
assert df.shape[0] == (1000 + 3 * 100 + 3 * 10)

50 changes: 1 addition & 49 deletions tests/test_spstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

"""
from __future__ import annotations
from typing import Tuple

import warnings

import geoutils as gu
Expand Down Expand Up @@ -224,50 +222,4 @@ def test_patches_method(self):
mask=~mask.astype(bool).squeeze(),
gsd=diff.res[0],
area_size=10000
)

class TestBinning:

def test_nd_binning(self):

ref, diff, mask = load_ref_and_diff()

slope, aspect = xdem.coreg.calculate_slope_and_aspect(ref.data.squeeze())

# 1d binning, by default will create 10 bins
df = xdem.spstats.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten()],list_var_names=['slope'])

# check length matches
assert df.shape[0] == 10
# check bin edges match the minimum and maximum of binning variable
assert np.nanmin(slope) == np.min(pd.IntervalIndex(df.slope).left)
assert np.nanmax(slope) == np.max(pd.IntervalIndex(df.slope).right)

# 1d binning with 20 bins
df = xdem.spstats.nd_binning(values=diff.data.flatten(), list_var=[slope.flatten()], list_var_names=['slope'],
list_var_bins=[[20]])
# check length matches
assert df.shape[0] == 20

# nmad goes up quite a bit with slope, we can expect a 10 m measurement error difference
assert df.nmad.values[-1] - df.nmad.values[0] > 10

# try custom stat
def percentile_80(a):
return np.nanpercentile(a, 80)

# check the function runs with custom functions
xdem.spstats.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten()],list_var_names=['slope'], statistics=['count',percentile_80])

# 2d binning
df = xdem.spstats.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten(),ref.data.flatten()],list_var_names=['slope','elevation'])

# dataframe should contain two 1D binning of length 10 and one 2D binning of length 100
assert df.shape[0] == (10 + 10 + 100)

# nd binning
df = xdem.spstats.nd_binning(values=diff.data.flatten(),list_var=[slope.flatten(),ref.data.flatten(),aspect.flatten()],list_var_names=['slope','elevation','aspect'])

# dataframe should contain three 1D binning of length 10 and three 2D binning of length 100 and one 2D binning of length 1000
assert df.shape[0] == (1000 + 3 * 100 + 3 * 10)

)
Loading