Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
This adds a number of tests for slicing() and statistical_inefficieny():
* Test that slicing() respects upper and lower time bounds (currently
  passes)
* Test that statistical_inefficieny() respects upper and lower time bounds
  when it is used without series to subsample (currently passes)
* Test that statistical_inefficieny() respects upper and lower time bounds
  when it is used without series to subsample (currently fails)
* Test that first using slicing() on the data frame, then
  statistical_inefficieny() without time bounds yields the same results as
  a single call to statistical_inefficieny() with time bounds (currently
  fails)

Refs alchemistry#198
  • Loading branch information
ptmerz committed Jun 22, 2022
1 parent 9153bbd commit 888602b
Showing 1 changed file with 114 additions and 0 deletions.
114 changes: 114 additions & 0 deletions src/alchemlyb/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,29 @@ def slicer(self, *args, **kwargs):
def test_basic_slicing(self, data, size):
assert len(self.slicer(data, lower=1000, upper=34000, step=5)) == size

@pytest.mark.parametrize(('data', 'lower', 'upper'),
[
(gmx_benzene_dHdl(), 1000, 34000),
(gmx_benzene_u_nk(), 1000, 34000),
])
def test_lower_and_upper_bound(self, data, lower, upper):
"""
Test that the lower and upper time is respected
"""
original_length = len(data)
# Check that the input data is appropriate for the test
assert any(data.reset_index()['time'] < lower)
assert any(data.reset_index()['time'] > upper)

# Slice data, and check that we don't observe times outside
# the prescribed range
sliced = self.slicer(data, lower=lower, upper=upper, step=5)
assert all(sliced.reset_index()['time'] >= lower)
assert all(sliced.reset_index()['time'] <= upper)

# Make sure we didn't change input data
assert len(data) == original_length

@pytest.mark.parametrize('data', [gmx_benzene_dHdl(),
gmx_benzene_u_nk()])
def test_disordered_exception(self, data):
Expand Down Expand Up @@ -213,6 +236,97 @@ def test_raise_ValueError_for_mismatched_data(self, series):
with pytest.raises(ValueError):
self.slicer(data, series=series)

@pytest.mark.parametrize(('data', 'lower', 'upper'),
[
(gmx_benzene_dHdl(), 1000, 34000),
(gmx_benzene_u_nk(), 1000, 34000),
])
def test_lower_and_upper_bound_slicer(self, data, lower, upper):
"""
Test that the lower and upper time is respected when using statistical_inefficiency
without a series. In this case, statistical_inefficiency should behave like slicing
"""
original_length = len(data)
# Check that the input data is appropriate for the test
assert any(data.reset_index()['time'] < lower)
assert any(data.reset_index()['time'] > upper)

# Slice data, and check that we don't observe times outside
# the prescribed range
sliced = self.slicer(data,
series=None,
lower=lower,
upper=upper,
step=5)
assert all(sliced.reset_index()['time'] >= lower)
assert all(sliced.reset_index()['time'] <= upper)

# Make sure we didn't change input data
assert len(data) == original_length

@pytest.mark.parametrize(('data', 'lower', 'upper'),
[
(gmx_benzene_dHdl(), 1000, 34000),
(gmx_benzene_u_nk(), 1000, 34000),
])
def test_lower_and_upper_bound_inefficiency(self, data, lower, upper):
"""
Test that the lower and upper time is respected when using statistical_inefficiency
with a series. In this case, statistical_inefficiency should slice the series, then
subsample the data frame.
"""
original_length = len(data)
# Check that the input data is appropriate for the test
assert any(data.reset_index()['time'] < lower)
assert any(data.reset_index()['time'] > upper)

# Subsample data, and check that we don't observe times outside
# the prescribed range
sliced = self.slicer(data,
series=data.sum(axis=1),
lower=lower,
upper=upper,
step=5)
assert all(sliced.reset_index()['time'] >= lower)
assert all(sliced.reset_index()['time'] <= upper)
# Make sure we didn't change input data
assert len(data) == original_length

@pytest.mark.parametrize(('data', 'lower', 'upper', 'conservative'),
[
(gmx_benzene_dHdl(), 1000, 34000, True),
(gmx_benzene_u_nk(), 1000, 34000, True),
(gmx_benzene_dHdl(), 1000, 34000, False),
(gmx_benzene_u_nk(), 1000, 34000, False),
])
def test_slicing_inefficiency_equivalence(self, data, lower, upper, conservative):
"""
Test that first slicing the data frame, then subsampling is equivalent to
subsampling with lower / upper bounds set
"""
original_length = len(data)
# Check that the input data is appropriate for the test
assert any(data.reset_index()['time'] < lower)
assert any(data.reset_index()['time'] > upper)

# Slice dataframe, then subsample it based on the sum of its components
sliced_data = slicing(data, lower=lower, upper=upper)
subsampled_sliced_data = self.slicer(sliced_data,
series=sliced_data.sum(axis=1),
conservative=conservative)

# Make sure we didn't change input data
assert len(data) == original_length
# Subsample the dataframe based on the sum of its components while
# also specifying the slicing range
subsampled_data = self.slicer(data,
series=data.sum(axis=1),
lower=lower,
upper=upper,
conservative=conservative)

assert (subsampled_sliced_data == subsampled_data).all(axis=None)


class TestEquilibriumDetection(TestSlicing, CorrelatedPreprocessors):

Expand Down

0 comments on commit 888602b

Please sign in to comment.